Right⌠forgetting the fact that Java has no SIMD? Besides that, the statement âhas a throughput of 36 cyclesâ makes no sense, at all.
Benchmark to show that float / double performance is nearly identical:
public static void main(String[] args)
{
int elems = 1024;
int runs = 16;
Random r = new Random();
for (int k = 0; k < 8; k++)
{
float[] f1 = new float[elems];
float[] f2 = new float[elems];
float[] f4x4 = new float[16];
for (int i = 0; i < 16; i++)
f4x4[i] = r.nextFloat();
long float_ms = benchFloat(f1, f2, f4x4, elems, runs);
System.out.println("float performance: " + float_ms / 1000000L + "ms (midpoint)");
double[] d1 = new double[elems];
double[] d2 = new double[elems];
double[] d4x4 = new double[16];
for (int i = 0; i < 16; i++)
d4x4[i] = r.nextDouble();
long double_ms = benchDouble(d1, d2, d4x4, elems, runs);
System.out.println("double performance: " + double_ms / 1000000L + "ms (midpoint)");
}
}
public static long benchFloat(float[] f1, float[] f2, float[] mat, int elems, int runs)
{
long[] ts = new long[runs];
for (int i = 0; i < ts.length; i++)
{
long a = System.nanoTime();
for (float t1 = 0.0f; t1 < 1.0f; t1 += 0.01f)
{
for (float t2 = 0.0f; t2 < 1.0f; t2 += 0.02f)
{
fiddleFloat(t1, t2, elems, f1, f2, mat);
}
}
long b = System.nanoTime();
ts[i] = b - a;
}
Arrays.sort(ts);
return ts[ts.length / 2];
}
public static long benchDouble(double[] d1, double[] d2, double[] mat, int elems, int runs)
{
long[] ts = new long[runs];
for (int i = 0; i < ts.length; i++)
{
long a = System.nanoTime();
for (double t1 = 0.0; t1 < 1.0; t1 += 0.01f)
{
for (double t2 = 0.0; t2 < 1.0; t2 += 0.02f)
{
fiddleDouble(t1, t2, elems, d1, d2, mat);
}
}
long b = System.nanoTime();
ts[i] = b - a;
}
Arrays.sort(ts);
return ts[ts.length / 2];
}
public static float fiddleFloat(float t1, float t2, int elems, float[] op1, float[] op2, float[] m4x4)
{
float sum = 0.0f;
for (int i = 0; i < elems; i++)
{
float f1 = op1[i];
float f2 = op2[i];
float diff1 = f2 - f1;
float f3 = t1 * diff1 + f1;
float diff2 = f3 - f2;
sum += t2 * diff2 + f2;
sum -= m4x4[0] * f1 + m4x4[1] * f2 + m4x4[2] * f3 + m4x4[3];
sum += m4x4[4] * f1 + m4x4[5] * f2 + m4x4[6] * f3 + m4x4[7];
sum -= m4x4[8] * f1 + m4x4[9] * f2 + m4x4[10] * f3 + m4x4[11];
}
return sum;
}
public static double fiddleDouble(double t1, double t2, int elems, double[] op1, double[] op2, double[] m4x4)
{
double sum = 0.0f;
for (int i = 0; i < elems; i++)
{
double f1 = op1[i];
double f2 = op2[i];
double diff1 = f2 - f1;
double f3 = t1 * diff1 + f1;
double diff2 = f3 - f2;
sum += t2 * diff2 + f2;
sum -= m4x4[0] * f1 + m4x4[1] * f2 + m4x4[2] * f3 + m4x4[3];
sum += m4x4[4] * f1 + m4x4[5] * f2 + m4x4[6] * f3 + m4x4[7];
sum -= m4x4[8] * f1 + m4x4[9] * f2 + m4x4[10] * f3 + m4x4[11];
}
return sum;
}
float performance: 51ms (midpoint)
double performance: 52ms (midpoint)
float performance: 75ms (midpoint)
double performance: 52ms (midpoint)
float performance: 50ms (midpoint)
double performance: 51ms (midpoint)
float performance: 50ms (midpoint)
double performance: 52ms (midpoint)
float performance: 50ms (midpoint)
double performance: 50ms (midpoint)
float performance: 50ms (midpoint)
double performance: 51ms (midpoint)
float performance: 50ms (midpoint)
double performance: 52ms (midpoint)
float performance: 50ms (midpoint)
double performance: 72ms (midpoint)