Hello,
I’ve just been looking around for a faster way to do Math.atan2(y,x) and I came across a number of methods. To compare them, I setup a simple JMH benchmark and accuracy test. Criticisms and new atan2 submissions are appreciated.
Recommended implementation with lookup table: Icecore’s
Recommended implementation without lookup table: Kappa’s
Relevant data
Current results:
apache_atan2 : Average Error 0.00000 / Largest Error 0.00000 - Performance 367.945 ns/op
default_atan2 : Average Error 0.00000 / Largest Error 0.00000 - Performance 579.170 ns/op
DSP.atan2_acc : Average Error 0.00344 / Largest Error 1.57079 - Performance 64.812 ns/op
DSP.atan2_fast : Average Error 0.04318 / Largest Error 1.57080 - Performance 51.691 ns/op
Icecore.atan2 : Average Error 0.00004 / Largest Error 0.00010 - Performance 27.291 ns/op
Kappa.atan2 : Average Error 0.00231 / Largest Error 0.00488 - Performance 38.766 ns/op
Riven.atan2 : Average Error 0.00288 / Largest Error 0.00787 - Performance 36.640 ns/op
I posted the raw data below for anyone to interpret for yourselves. The links to the original sources are provided in the comments.
public class ATAN2 {
// Values declared above to avoid the JIT from inlining
public static int nTen = -10;
public static int nFour = -4;
public static int nTwo = -2;
public static int nOne = -1;
public static int pOne = 1;
public static int pThree = 3;
public static int pFour = 4;
public static int pNine = 9;
///////////////////////////////////////
// Default atan2
///////////////////////////////////////
@Benchmark
public double math_default() {
return Math.atan2(pThree, pFour) + Math.atan2(nTwo, pOne) + Math.atan2(nFour, nOne) + Math.atan2(pNine, nTen);
}
///////////////////////////////////////
// Riven's atan2 ( http://www.java-gaming.org/index.php?topic=14647.0 )
///////////////////////////////////////
public static final class Riven {
private static final int ATAN2_BITS = 7;
private static final int ATAN2_BITS2 = ATAN2_BITS << 1;
private static final int ATAN2_MASK = ~(-1 << ATAN2_BITS2);
private static final int ATAN2_COUNT = ATAN2_MASK + 1;
private static final int ATAN2_DIM = (int) Math.sqrt(ATAN2_COUNT);
private static final float INV_ATAN2_DIM_MINUS_1 = 1.0f / (ATAN2_DIM - 1);
private static final float[] atan2 = new float[ATAN2_COUNT];
static {
for (int i = 0; i < ATAN2_DIM; i++) {
for (int j = 0; j < ATAN2_DIM; j++) {
float x0 = (float) i / ATAN2_DIM;
float y0 = (float) j / ATAN2_DIM;
atan2[j * ATAN2_DIM + i] = (float) Math.atan2(y0, x0);
}
}
}
public static final float atan2(float y, float x) {
float add, mul;
if (x < 0.0f) {
if (y < 0.0f) {
x = -x;
y = -y;
mul = 1.0f;
} else {
x = -x;
mul = -1.0f;
}
add = -3.141592653f;
} else {
if (y < 0.0f) {
y = -y;
mul = -1.0f;
} else {
mul = 1.0f;
}
add = 0.0f;
}
float invDiv = 1.0f / (((x < y) ? y : x) * INV_ATAN2_DIM_MINUS_1);
int xi = (int) (x * invDiv);
int yi = (int) (y * invDiv);
return (atan2[yi * ATAN2_DIM + xi] + add) * mul;
}
}
@Benchmark
public double math_riven() {
return Riven.atan2(pThree, pFour) + Riven.atan2(nTwo, pOne) + Riven.atan2(nFour, nOne) + Riven.atan2(pNine, nTen);
}
///////////////////////////////////////
// DSP's atan2 ( http://dspguru.com/dsp/tricks/fixed-point-atan2-with-self-normalization )
///////////////////////////////////////
public static final class DSP {
private static final double coeff_1 = Math.PI / 4;
private static final double coeff_2 = coeff_1 * 3;
public static final double atan2_fast(double y, double x) {
double r;
if (y < 0) {
y = -y;
if (x > 0) {
r = (x - y) / (x + y);
return -(coeff_1 - coeff_1 * r);
} else {
r = (x + y) / (y - x);
return -(coeff_2 - coeff_1 * r);
}
} else {
if (y == 0) {
y = 1.0E-25;
}
if (x > 0) {
r = (x - y) / (x + y);
return coeff_1 - coeff_1 * r;
} else {
r = (x + y) / (y - x);
return coeff_2 - coeff_1 * r;
}
}
}
public static final double atan2_accurate(double y, double x) {
double r;
if (y < 0) {
y = -y;
if (x > 0) {
r = (x - y) / (x + y);
return -(0.1963 * r * r * r - 0.9817 * r + coeff_1);
} else {
r = (x + y) / (y - x);
return -(0.1963 * r * r * r - 0.9817 * r + coeff_2);
}
} else {
if (y == 0) {
y = 1.0E-25;
}
if (x > 0) {
r = (x - y) / (x + y);
return 0.1963 * r * r * r - 0.9817 * r + coeff_1;
} else {
r = (x + y) / (y - x);
return 0.1963 * r * r * r - 0.9817 * r + coeff_2;
}
}
}
}
@Benchmark
public double math_dsp_fast() {
return DSP.atan2_fast(pThree, pFour) + DSP.atan2_fast(nTwo, pOne) + DSP.atan2_fast(nFour, nOne) + DSP.atan2_fast(pNine, nTen);
}
@Benchmark
public double math_dsp_accurate() {
return DSP.atan2_accurate(pThree, pFour) + DSP.atan2_accurate(nTwo, pOne) + DSP.atan2_accurate(nFour, nOne) + DSP.atan2_accurate(pNine, nTen);
}
///////////////////////////////////////
// kappa's atan2 ( http://www.java-gaming.org/topics/extremely-fast-atan2/36467/msg/346112/view.html#msg346112 )
///////////////////////////////////////
public static final class Kappa {
static final float PI = 3.1415927f;
static final float PI_2 = PI / 2f;
static final float MINUS_PI_2 = -PI_2;
public static final float atan2(float y, float x) {
if (x == 0.0f) {
if (y > 0.0f) {
return PI_2;
}
if (y == 0.0f) {
return 0.0f;
}
return MINUS_PI_2;
}
final float atan;
final float z = y / x;
if (Math.abs(z) < 1.0f) {
atan = z / (1.0f + 0.28f * z * z);
if (x < 0.0f) {
return (y < 0.0f) ? atan - PI : atan + PI;
}
return atan;
} else {
atan = PI_2 - z / (z * z + 0.28f);
return (y < 0.0f) ? atan - PI : atan;
}
}
}
@Benchmark
public double math_kappa() {
return Kappa.atan2(pThree, pFour) + Kappa.atan2(nTwo, pOne) + Kappa.atan2(nFour, nOne) + Kappa.atan2(pNine, nTen);
}
///////////////////////////////////////
// Icecore's atan2 ( http://www.java-gaming.org/topics/extremely-fast-atan2/36467/msg/346145/view.html#msg346145 )
///////////////////////////////////////
public static final class Icecore {
private static final int Size_Ac = 100000;
private static final int Size_Ar = Size_Ac + 1;
private static final float Pi = (float) Math.PI;
private static final float Pi_H = Pi / 2;
private static final float Atan2[] = new float[Size_Ar];
private static final float Atan2_PM[] = new float[Size_Ar];
private static final float Atan2_MP[] = new float[Size_Ar];
private static final float Atan2_MM[] = new float[Size_Ar];
private static final float Atan2_R[] = new float[Size_Ar];
private static final float Atan2_RPM[] = new float[Size_Ar];
private static final float Atan2_RMP[] = new float[Size_Ar];
private static final float Atan2_RMM[] = new float[Size_Ar];
static {
for (int i = 0; i <= Size_Ac; i++) {
double d = (double) i / Size_Ac;
double x = 1;
double y = x * d;
float v = (float) Math.atan2(y, x);
Atan2[i] = v;
Atan2_PM[i] = Pi - v;
Atan2_MP[i] = -v;
Atan2_MM[i] = -Pi + v;
Atan2_R[i] = Pi_H - v;
Atan2_RPM[i] = Pi_H + v;
Atan2_RMP[i] = -Pi_H + v;
Atan2_RMM[i] = -Pi_H - v;
}
}
public static final float atan2(float y, float x) {
if (y < 0) {
if (x < 0) {
//(y < x) because == (-y > -x)
if (y < x) {
return Atan2_RMM[(int) (x / y * Size_Ac)];
} else {
return Atan2_MM[(int) (y / x * Size_Ac)];
}
} else {
y = -y;
if (y > x) {
return Atan2_RMP[(int) (x / y * Size_Ac)];
} else {
return Atan2_MP[(int) (y / x * Size_Ac)];
}
}
} else {
if (x < 0) {
x = -x;
if (y > x) {
return Atan2_RPM[(int) (x / y * Size_Ac)];
} else {
return Atan2_PM[(int) (y / x * Size_Ac)];
}
} else {
if (y > x) {
return Atan2_R[(int) (x / y * Size_Ac)];
} else {
return Atan2[(int) (y / x * Size_Ac)];
}
}
}
}
}
@Benchmark
public double math_icecore() {
return Icecore.atan2(pThree, pFour) + Icecore.atan2(nTwo, pOne) + Icecore.atan2(nFour, nOne) + Icecore.atan2(pNine, nTen);
}
///////////////////////////////////////
// Apache's FastMath.atan2 ( http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/util/FastMath.html )
///////////////////////////////////////
@Benchmark
public double math_apache() {
return FastMath.atan2(pThree, pFour) + FastMath.atan2(nTwo, pOne) + FastMath.atan2(nFour, nOne) + FastMath.atan2(pNine, nTen);
}
}
Accuracy
///////////////////////////////////////
// Accuracy
///////////////////////////////////////
public static void main(String[] args) {
int range = 1500;
double[] total = new double[7];
double[] largestError = new double[7];
int count = 0;
for (int y = -range; y <= range; y++) {
for (int x = -range; x <= range; x++) {
double result;
result = Math.abs(Math.atan2(y, x) - Math.atan2(y, x));
total[0] += result;
largestError[0] = result > largestError[0] ? result : largestError[0];
result = Math.abs(Math.atan2(y, x) - DSP.atan2_accurate(y, x));
total[1] += result;
largestError[1] = result > largestError[1] ? result : largestError[1];
result = Math.abs(Math.atan2(y, x) - DSP.atan2_fast(y, x));
total[2] += result;
largestError[2] = result > largestError[2] ? result : largestError[2];
result = Math.abs(Math.atan2(y, x) - Icecore.atan2(y, x));
total[3] += result;
largestError[3] = result > largestError[3] ? result : largestError[3];
result = Math.abs(Math.atan2(y, x) - Kappa.atan2(y, x));
total[4] += result;
largestError[4] = result > largestError[4] ? result : largestError[4];
result = Math.abs(Math.atan2(y, x) - Riven.atan2(y, x));
total[5] += result;
largestError[5] = result > largestError[5] ? result : largestError[5];
result = Math.abs(Math.atan2(y, x) - FastMath.atan2(y, x));
total[6] += result;
largestError[6] = result > largestError[6] ? result : largestError[6];
count++;
}
}
System.out.println(String.format("A lower average means higher accuracy. Results over %,d samples.", count));
System.out.println(String.format("apache_atan2 : Average Error %.5f / Largest Error %.5f", total[6] / count, largestError[6]));
System.out.println(String.format("default_atan2 : Average Error %.5f / Largest Error %.5f", total[0] / count, largestError[0]));
System.out.println(String.format("DSP.atan2_acc : Average Error %.5f / Largest Error %.5f", total[1] / count, largestError[1]));
System.out.println(String.format("DSP.atan2_fast : Average Error %.5f / Largest Error %.5f", total[2] / count, largestError[2]));
System.out.println(String.format("Icecore.atan2 : Average Error %.5f / Largest Error %.5f", total[3] / count, largestError[3]));
System.out.println(String.format("Kappa.atan2 : Average Error %.5f / Largest Error %.5f", total[4] / count, largestError[4]));
System.out.println(String.format("Riven.atan2 : Average Error %.5f / Largest Error %.5f", total[5] / count, largestError[5]));
/*
A lower average means higher accuracy. Results over 9,006,001 samples.
apache_atan2 : Average Error 0.00000 / Largest Error 0.00000
default_atan2 : Average Error 0.00000 / Largest Error 0.00000
DSP.atan2_acc : Average Error 0.00344 / Largest Error 1.57079
DSP.atan2_fast : Average Error 0.04318 / Largest Error 1.57080
Icecore.atan2 : Average Error 0.00000 / Largest Error 0.00001
Kappa.atan2 : Average Error 0.00231 / Largest Error 0.00488
Riven.atan2 : Average Error 0.00288 / Largest Error 0.00787
*/
}
Benchmark results
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_apache
# Run progress: 0.00% complete, ETA 00:01:10
# Fork: 1 of 1
# Warmup Iteration 1: 374.275 ns/op
# Warmup Iteration 2: 372.820 ns/op
# Warmup Iteration 3: 369.142 ns/op
# Warmup Iteration 4: 368.814 ns/op
# Warmup Iteration 5: 367.294 ns/op
Iteration 1: 366.987 ns/op
Iteration 2: 371.812 ns/op
Iteration 3: 366.907 ns/op
Iteration 4: 366.877 ns/op
Iteration 5: 367.142 ns/op
Result "math_apache":
367.945 ñ(99.9%) 8.334 ns/op [Average]
(min, avg, max) = (366.877, 367.945, 371.812), stdev = 2.164
CI (99.9%): [359.611, 376.279] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_default
# Run progress: 14.29% complete, ETA 00:01:02
# Fork: 1 of 1
# Warmup Iteration 1: 590.515 ns/op
# Warmup Iteration 2: 585.678 ns/op
# Warmup Iteration 3: 578.451 ns/op
# Warmup Iteration 4: 577.610 ns/op
# Warmup Iteration 5: 578.103 ns/op
Iteration 1: 577.544 ns/op
Iteration 2: 585.795 ns/op
Iteration 3: 577.450 ns/op
Iteration 4: 577.399 ns/op
Iteration 5: 577.663 ns/op
Result "math_default":
579.170 ñ(99.9%) 14.266 ns/op [Average]
(min, avg, max) = (577.399, 579.170, 585.795), stdev = 3.705
CI (99.9%): [564.904, 593.437] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_dsp_accurate
# Run progress: 28.57% complete, ETA 00:00:51
# Fork: 1 of 1
# Warmup Iteration 1: 66.682 ns/op
# Warmup Iteration 2: 65.408 ns/op
# Warmup Iteration 3: 64.661 ns/op
# Warmup Iteration 4: 64.637 ns/op
# Warmup Iteration 5: 64.707 ns/op
Iteration 1: 64.645 ns/op
Iteration 2: 65.505 ns/op
Iteration 3: 64.629 ns/op
Iteration 4: 64.631 ns/op
Iteration 5: 64.652 ns/op
Result "math_dsp_accurate":
64.812 ñ(99.9%) 1.492 ns/op [Average]
(min, avg, max) = (64.629, 64.812, 65.505), stdev = 0.387
CI (99.9%): [63.320, 66.304] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_dsp_fast
# Run progress: 42.86% complete, ETA 00:00:41
# Fork: 1 of 1
# Warmup Iteration 1: 52.145 ns/op
# Warmup Iteration 2: 51.808 ns/op
# Warmup Iteration 3: 51.245 ns/op
# Warmup Iteration 4: 51.228 ns/op
# Warmup Iteration 5: 51.298 ns/op
Iteration 1: 51.269 ns/op
Iteration 2: 52.402 ns/op
Iteration 3: 51.282 ns/op
Iteration 4: 51.425 ns/op
Iteration 5: 52.078 ns/op
Result "math_dsp_fast":
51.691 ñ(99.9%) 1.993 ns/op [Average]
(min, avg, max) = (51.269, 51.691, 52.402), stdev = 0.518
CI (99.9%): [49.698, 53.684] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_icecore
# Run progress: 57.14% complete, ETA 00:00:31
# Fork: 1 of 1
# Warmup Iteration 1: 44.693 ns/op
# Warmup Iteration 2: 28.011 ns/op
# Warmup Iteration 3: 27.220 ns/op
# Warmup Iteration 4: 27.225 ns/op
# Warmup Iteration 5: 27.249 ns/op
Iteration 1: 27.204 ns/op
Iteration 2: 27.599 ns/op
Iteration 3: 27.220 ns/op
Iteration 4: 27.212 ns/op
Iteration 5: 27.221 ns/op
Result "math_icecore":
27.291 ñ(99.9%) 0.663 ns/op [Average]
(min, avg, max) = (27.204, 27.291, 27.599), stdev = 0.172
CI (99.9%): [26.628, 27.955] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_kappa
# Run progress: 71.43% complete, ETA 00:00:20
# Fork: 1 of 1
# Warmup Iteration 1: 40.813 ns/op
# Warmup Iteration 2: 40.480 ns/op
# Warmup Iteration 3: 38.620 ns/op
# Warmup Iteration 4: 38.945 ns/op
# Warmup Iteration 5: 38.653 ns/op
Iteration 1: 38.622 ns/op
Iteration 2: 39.381 ns/op
Iteration 3: 38.611 ns/op
Iteration 4: 38.615 ns/op
Iteration 5: 38.600 ns/op
Result "math_kappa":
38.766 ñ(99.9%) 1.325 ns/op [Average]
(min, avg, max) = (38.600, 38.766, 39.381), stdev = 0.344
CI (99.9%): [37.441, 40.090] (assumes normal distribution)
# JMH 1.9.3 (released 73 days ago)
# VM invoker: C:\Program Files\Java\jdk1.8.0_45\jre\bin\java.exe
# VM options: <none>
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Average time, time/op
# Benchmark: com.gmail.mooman219.benchmark_test.ATAN2.math_riven
# Run progress: 85.71% complete, ETA 00:00:10
# Fork: 1 of 1
# Warmup Iteration 1: 45.780 ns/op
# Warmup Iteration 2: 40.609 ns/op
# Warmup Iteration 3: 36.467 ns/op
# Warmup Iteration 4: 36.467 ns/op
# Warmup Iteration 5: 36.502 ns/op
Iteration 1: 36.458 ns/op
Iteration 2: 37.000 ns/op
Iteration 3: 36.473 ns/op
Iteration 4: 36.435 ns/op
Iteration 5: 36.836 ns/op
Result "math_riven":
36.640 ñ(99.9%) 1.002 ns/op [Average]
(min, avg, max) = (36.435, 36.640, 37.000), stdev = 0.260
CI (99.9%): [35.638, 37.643] (assumes normal distribution)
# Run complete. Total time: 00:01:12
Benchmark Mode Cnt Score Error Units
ATAN2.math_apache avgt 5 367.945 ñ 8.334 ns/op
ATAN2.math_default avgt 5 579.170 ñ 14.266 ns/op
ATAN2.math_dsp_accurate avgt 5 64.812 ñ 1.492 ns/op
ATAN2.math_dsp_fast avgt 5 51.691 ñ 1.993 ns/op
ATAN2.math_icecore avgt 5 27.291 ñ 0.663 ns/op
ATAN2.math_kappa avgt 5 38.766 ñ 1.325 ns/op
ATAN2.math_riven avgt 5 36.640 ñ 1.002 ns/op
Keep in mind these are micro benchmarks and not indicative of real world performance; the JMH can only do so much to make sure the JIT doesn’t interfere. There may be a fair amount of branch prediction and caching that’s going on. For example, the lookup table in Riven’s atan2 implementation might just be sitting in a cpu cache.