Background:
Back at it with some Atan2 benchmarks. I found the source for my old benchmarks and wanted to run this again with some different settings. The purpose of this post is to find a faster way to do Math.atan2(y,x). To compare various methods, I setup a simple JMH benchmark and accuracy test. Criticisms and new atan2 submissions are appreciated.
[u]Outcome:[/u] Fastest implementation with lookup table: [b]Icecore[/b] Fastest implementation without lookup table: [b]Diamond[/b]
Note: Fastest doesn’t mean most accurate. Please take into account the average and largest error for each algorithm.
[u]Relevant Data:[/u]
A lower average means higher accuracy.
Apache : Average Error 0.00000 / Largest Error 0.00000 : 36,012,001 samples : 1488.165 ± 7.285 ms/op
Default : Average Error 0.00000 / Largest Error 0.00000 : 36,012,001 samples : 1773.411 ± 193.228 ms/op
Diamond : Average Error 0.04318 / Largest Error 0.07112 : 36,012,001 samples : 47.299 ± 1.467 ms/op
DSPAccurate : Average Error 0.00344 / Largest Error 0.01015 : 36,012,001 samples : 72.539 ± 3.702 ms/op
DSPFast : Average Error 0.04318 / Largest Error 0.07111 : 36,012,001 samples : 56.547 ± 0.662 ms/op
Icecore : Average Error 0.00038 / Largest Error 0.00098 : 36,012,001 samples : 58.265 ± 0.686 ms/op
Kappa : Average Error 0.00224 / Largest Error 0.00468 : 36,012,001 samples : 67.622 ± 21.004 ms/op
Riven : Average Error 0.00291 / Largest Error 0.00787 : 36,012,001 samples : 99.701 ± 2.846 ms/op
Baseline : : 36,012,001 samples : 36.155 ± 4.251 ms/op
Note: Keep in mind these are micro benchmarks and not indicative of real world performance; the JMH can only do so much to make sure the JIT doesn’t interfere. There may be a fair amount of branch prediction and caching that’s going on, and lookup tables might just be sitting in a cpu cache.
[u]Source:[/u]
public class Atan2 {
///////////////////////////////////////
// Benchmark Settings
///////////////////////////////////////
private static final double LOW_D = -3;
private static final double HIGH_D = 3;
private static final double INC_D = 0.001D;
private static final float LOW_F = (float) LOW_D;
private static final float HIGH_F = (float) HIGH_D;
private static final float INC_F = (float) INC_D;
public static Options buildOptions() {
return new OptionsBuilder()
.include(Atan2.class.getSimpleName())
.warmupTime(new TimeValue(2, TimeUnit.SECONDS))
.warmupIterations(4)
.measurementTime(new TimeValue(2, TimeUnit.SECONDS))
.measurementIterations(4)
.forks(1)
.mode(Mode.AverageTime)
.timeUnit(TimeUnit.MILLISECONDS)
.threads(1)
.build();
}
///////////////////////////////////////
// Accuracy
///////////////////////////////////////
public static void printError() {
Atan2Benchmark[] benchmarks = new Atan2Benchmark[]{
new Apache(),
new Default(),
new Diamond(),
new DSPAccurate(),
new DSPFast(),
new Icecore(),
new Kappa(),
new Riven(),};
System.out.println(String.format("A lower average means higher accuracy."));
for (Atan2Benchmark benchmark : benchmarks) {
benchmark.printError(LOW_D, HIGH_D, INC_D);
}
}
///////////////////////////////////////
// Baseline
///////////////////////////////////////
@Benchmark
public double baseline() {
double sum = 0;
for (double x = LOW_D; x < HIGH_D; x += INC_D) {
for (double y = LOW_D; y < HIGH_D; y += INC_D) {
sum += x + y;
}
}
return sum;
}
///////////////////////////////////////
// Default atan2
///////////////////////////////////////
public static final class Default extends Atan2Benchmark {
public Default() {
super("Default");
}
@Override
public float test(float x, float y) {
return (float) Math.atan2(x, y);
}
}
@Benchmark
public double atan2_default() {
double sum = 0;
for (double x = LOW_D; x < HIGH_D; x += INC_D) {
for (double y = LOW_D; y < HIGH_D; y += INC_D) {
sum += Math.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// Apache's FastMath.atan2 ( http://commons.apache.org/proper/commons-math/javadocs/api-3.3/org/apache/commons/math3/util/FastMath.html )
///////////////////////////////////////
public static final class Apache extends Atan2Benchmark {
public Apache() {
super("Apache");
}
@Override
public float test(float x, float y) {
return (float) FastMath.atan2(x, y);
}
}
@Benchmark
public double atan2_apache() {
double sum = 0;
for (double x = LOW_D; x < HIGH_D; x += INC_D) {
for (double y = LOW_D; y < HIGH_D; y += INC_D) {
sum += FastMath.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// Riven's atan2 ( http://www.java-gaming.org/index.php?topic=14647.0 )
///////////////////////////////////////
public static final class Riven extends Atan2Benchmark {
public Riven() {
super("Riven");
}
@Override
public float test(float x, float y) {
return Riven.atan2(x, y);
}
private static final int ATAN2_BITS = 7;
private static final int ATAN2_BITS2 = ATAN2_BITS << 1;
private static final int ATAN2_MASK = ~(-1 << ATAN2_BITS2);
private static final int ATAN2_COUNT = ATAN2_MASK + 1;
private static final int ATAN2_DIM = (int) Math.sqrt(ATAN2_COUNT);
private static final float INV_ATAN2_DIM_MINUS_1 = 1.0f / (ATAN2_DIM - 1);
private static final float[] atan2 = new float[ATAN2_COUNT];
static {
for (int i = 0; i < ATAN2_DIM; i++) {
for (int j = 0; j < ATAN2_DIM; j++) {
float x0 = (float) i / ATAN2_DIM;
float y0 = (float) j / ATAN2_DIM;
atan2[j * ATAN2_DIM + i] = (float) Math.atan2(y0, x0);
}
}
}
public static final float atan2(float y, float x) {
float add, mul;
if (x < 0.0f) {
if (y < 0.0f) {
x = -x;
y = -y;
mul = 1.0f;
} else {
x = -x;
mul = -1.0f;
}
add = -3.141592653f;
} else {
if (y < 0.0f) {
y = -y;
mul = -1.0f;
} else {
mul = 1.0f;
}
add = 0.0f;
}
float invDiv = 1.0f / (((x < y) ? y : x) * INV_ATAN2_DIM_MINUS_1);
int xi = (int) (x * invDiv);
int yi = (int) (y * invDiv);
return (atan2[yi * ATAN2_DIM + xi] + add) * mul;
}
}
@Benchmark
public float atan2_riven() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += Riven.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// DSP's atan2 ( http://dspguru.com/dsp/tricks/fixed-point-atan2-with-self-normalization )
///////////////////////////////////////
public static final class DSPFast extends Atan2Benchmark {
public DSPFast() {
super("DSPFast");
}
@Override
public float test(float x, float y) {
return DSPFast.atan2(x, y);
}
private static final float PI_2 = 1.5707963F; // Math.PI / 2
private static final float PI_4 = 0.7853982F; // Math.PI / 4
private static final float PI_3_4 = 2.3561945F; // (Math.PI / 4) * 3
private static final float MINUS_PI_2 = -1.5707963F; // Math.PI / -2
public static final float atan2(float y, float x) {
float r;
float abs_y = Float.intBitsToFloat(Float.floatToRawIntBits(y) << 1 >>> 1);
if (x == 0.0F) {
if (y > 0.0F) {
return PI_2;
}
if (y == 0.0F) {
return 0.0f;
}
return MINUS_PI_2;
} else if (x > 0) {
r = (x - abs_y) / (x + abs_y);
r = PI_4 - PI_4 * r;
} else {
r = (x + abs_y) / (abs_y - x);
r = PI_3_4 - PI_4 * r;
}
return y < 0 ? -r : r;
}
}
public static final class DSPAccurate extends Atan2Benchmark {
public DSPAccurate() {
super("DSPAccurate");
}
@Override
public float test(float x, float y) {
return DSPAccurate.atan2(x, y);
}
private static final float PI_2 = 1.5707963F; // Math.PI / 2
private static final float PI_4 = 0.7853982F; // Math.PI / 4
private static final float PI_3_4 = 2.3561945F; // (Math.PI / 4) * 3
private static final float MINUS_PI_2 = -1.5707963F; // Math.PI / -2
public static final float atan2(float y, float x) {
float r;
float c;
float abs_y = Float.intBitsToFloat(Float.floatToRawIntBits(y) << 1 >>> 1);
if (x == 0.0F) {
if (y > 0.0F) {
return PI_2;
}
if (y == 0.0F) {
return 0.0f;
}
return MINUS_PI_2;
} else if (x > 0) {
r = (x - abs_y) / (x + abs_y);
c = PI_4;
} else {
r = (x + abs_y) / (abs_y - x);
c = PI_3_4;
}
r = 0.1963F * r * r * r - 0.9817F * r + c;
return y < 0 ? -r : r;
}
}
@Benchmark
public float atan2_dsp_fast() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += DSPFast.atan2(x, y);
}
}
return sum;
}
@Benchmark
public float atan2_dsp_accurate() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += DSPAccurate.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// kappa's atan2 ( http://www.java-gaming.org/topics/extremely-fast-atan2/36467/msg/346112/view.html#msg346112 )
///////////////////////////////////////
public static final class Kappa extends Atan2Benchmark {
public Kappa() {
super("Kappa");
}
@Override
public float test(float x, float y) {
return Kappa.atan2(x, y);
}
private static final float PI = 3.1415927f;
private static final float PI_2 = PI / 2f;
private static final float MINUS_PI_2 = -PI_2;
private static final float C_M = 0.2808722f;
private static final float C_A = 0.2808722f;
public static final float atan2(float y, float x) {
if (x == 0.0f) {
if (y > 0.0f) {
return PI_2;
}
if (y == 0.0f) {
return 0.0f;
}
return MINUS_PI_2;
}
final float atan;
final float z = y / x;
if (Math.abs(z) < 1.0f) {
atan = z / (z * z * C_M + 1.0f);
if (x < 0.0f) {
return (y < 0.0f) ? atan - PI : atan + PI;
}
return atan;
} else {
atan = PI_2 - z / (z * z + C_A);
return (y < 0.0f) ? atan - PI : atan;
}
}
}
@Benchmark
public float atan2_kappa() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += Kappa.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// Icecore's atan2 ( http://www.java-gaming.org/topics/extremely-fast-atan2/36467/msg/346145/view.html#msg346145 )
///////////////////////////////////////
public static final class Icecore extends Atan2Benchmark {
public Icecore() {
super("Icecore");
}
@Override
public float test(float x, float y) {
return Icecore.atan2(x, y);
}
private static final float PI = (float) Math.PI;
private static final float PI_2 = PI / 2;
private static final float PI_NEG_2 = -PI_2;
private static final int SIZE = 1024;
private static final float ATAN2[];
static {
final int Size_Ar = SIZE + 1;
ATAN2 = new float[Size_Ar];
for (int i = 0; i <= SIZE; i++) {
double d = (double) i / SIZE;
double x = 1;
double y = x * d;
ATAN2[i] = (float) Math.atan2(y, x);
}
}
public static final float atan2(float y, float x) {
if (y < 0) {
if (x < 0) {
// (y < x) because == (-y > -x)
if (y < x) {
return PI_NEG_2 - ATAN2[(int) (x / y * SIZE)];
} else {
return ATAN2[(int) (y / x * SIZE)] - PI;
}
} else {
y = -y;
if (y > x) {
return ATAN2[(int) (x / y * SIZE)] - PI_2;
} else {
return -ATAN2[(int) (y / x * SIZE)];
}
}
} else {
if (x < 0) {
x = -x;
if (y > x) {
return PI_2 + ATAN2[(int) (x / y * SIZE)];
} else {
return PI - ATAN2[(int) (y / x * SIZE)];
}
} else {
if (y > x) {
return PI_2 - ATAN2[(int) (x / y * SIZE)];
} else {
return ATAN2[(int) (y / x * SIZE)];
}
}
}
}
}
@Benchmark
public float atan2_icecore() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += Icecore.atan2(x, y);
}
}
return sum;
}
///////////////////////////////////////
// Diamond's atan2 ( https://stackoverflow.com/questions/1427422/cheap-algorithm-to-find-measure-of-angle-between-vectors/14675998#14675998 )
///////////////////////////////////////
public static final class Diamond extends Atan2Benchmark {
public Diamond() {
super("Diamond");
}
@Override
public float test(float x, float y) {
return Diamond.atan2(x, y);
}
private static final float PI_2 = 1.5707963F; // Math.PI / 2
public static final float atan2(float y, float x) {
float angle;
if (y == 0f && x >= 0f) {
return 0;
} else if (y >= 0f) {
if (x >= 0f) {
angle = y / (x + y);
} else {
angle = 1f - x / (-x + y);
}
} else {
if (x < 0f) {
angle = -2f + y / (x + y);
} else {
angle = -1f + x / (x - y);
}
}
return angle * PI_2;
}
}
@Benchmark
public float atan2_diamond() {
float sum = 0;
for (float x = LOW_F; x < HIGH_F; x += INC_F) {
for (float y = LOW_F; y < HIGH_F; y += INC_F) {
sum += Diamond.atan2(x, y);
}
}
return sum;
}
}
[u]Updates:[/u] 1: Added baseline benchmark. Cleaned up DSPFast/DSPAccurate implementation to look more sane with no performance loss. 2: Improved Kappa's accuracy with no performance loss. Updated Icecore's submission. Retested all algorithms. 3: Added the Diamond implementation.