学习了,这个确实能加速 ~
TicToc float_time4("float multiply 4");
for (size_t i = 0; i < Nf; i += 4) {
result_float[i] = float(i) * pi_float;
result_float[i+1] = float(i) * pi_float;
result_float[i+2] = float(i) * pi_float;
result_float[i+3] = float(i) * pi_float;
}
float_time4.toc();
TicToc double_time4("double multiply 4");
for (size_t i = 0; i < Nd; i += 2) {
result_double[i] = double(i) * pi_double;
result_double[i+1] = double(i) * pi_double;
}
double_time4.toc();
float multiply 1 cost 13 ms.
double multiply 1 cost 11 ms.
float multiply 2 cost 14 ms.
double multiply 2 cost 11 ms.
float multiply 3 cost 6 ms.
double multiply 3 cost 12 ms.
float multiply 4 cost 6 ms.
double multiply 4 cost 11 ms.
--
FROM 14.155.19.*