是下面这样吗? 测试结果几乎没有什么变化。。。编译需要加什么flag吗
#include <iostream>
#include <math.h>
#include <chrono>
#include <string.h>
using namespace std;
class TicToc
{
public:
TicToc(const std::string& name):m_name(name) { tic(); }
void tic() {
start = std::chrono::system_clock::now();
}
void toc() {
auto millinon_second = std::chrono::duration_cast<std::chrono::milliseconds>
(std::chrono::system_clock::now()-start);
std::cout << m_name << " cost " << millinon_second.count() << " ms." << std::endl;
}
private:
std::string m_name;
std::chrono::time_point<std::chrono::system_clock> start, end;
};
constexpr size_t N = 20000000;
constexpr size_t Nf = 20000000 - 4;
constexpr size_t Nd = 20000000 - 2;
double result_double[N];
float result_float[N];
double pi_double = 3.1415926;
float pi_float = 3.1415926f;
int main()
{
memset(result_double, 0, sizeof(result_double));
memset(result_float, 0, sizeof(result_float));
TicToc float_time1("float multiply 1");
for (size_t i = 0; i < N; ++i) {
result_float[i] = float(i) * pi_float;
}
float_time1.toc();
TicToc double_time1("double multiply 1");
for (size_t i = 0; i < N; ++i) {
result_double[i] = double(i) * pi_double;
}
double_time1.toc();
/////////////////////////////////////////
memset(result_double, 0, sizeof(result_double));
memset(result_float, 0, sizeof(result_float));
TicToc float_time2("float multiply 2");
for (size_t i = 0; i < N; ++i) {
result_float[i] = float(i) * pi_float;
}
float_time2.toc();
TicToc double_time2("double multiply 2");
for (size_t i = 0; i < N; ++i) {
result_double[i] = double(i) * pi_double;
}
double_time2.toc();
/////////////////////////////////////////
memset(result_double, 0, sizeof(result_double));
memset(result_float, 0, sizeof(result_float));
TicToc float_time3("float multiply 3");
for (size_t i = 0; i < Nf; i += 4) {
result_float[i] = float(i) * pi_float;
result_float[i+1] = float(i+1) * pi_float;
result_float[i+2] = float(i+2) * pi_float;
result_float[i+3] = float(i+3) * pi_float;
}
float_time3.toc();
TicToc double_time3("double multiply 3");
for (size_t i = 0; i < Nd; i += 2) {
result_double[i] = double(i) * pi_double;
result_double[i+1] = double(i+1) * pi_double;
}
double_time3.toc();
/////////////////////////////////////////
memset(result_double, 0, sizeof(result_double));
memset(result_float, 0, sizeof(result_float));
TicToc float_time4("float multiply 4");
for (size_t i = 0; i < Nf; i += 4) {
result_float[i] = float(i) * pi_float;
result_float[i+1] = float(i+1) * pi_float;
result_float[i+2] = float(i+2) * pi_float;
result_float[i+3] = float(i+3) * pi_float;
}
float_time4.toc();
TicToc double_time4("double multiply 4");
for (size_t i = 0; i < Nd; i += 2) {
result_double[i] = double(i) * pi_double;
result_double[i+1] = double(i+1) * pi_double;
}
double_time4.toc();
}
【 在 foliver 的大作中提到: 】
: 直接把i++变成i+=4。循环里面连续计算4个,编译器就会合并计算。
--
修改:confinement FROM 14.155.19.*
FROM 14.155.19.*