Was curious about scalability/practicality of threading arithmetic over large arrays.
Put what ever you want in op() to measure your own system! Post results!
Also tell me if I did anything dumb in the bench code, wanna make sure I’m doing that right. :point:
import java.util.*;
import java.util.concurrent.*;
public class ArrayThreadingTest {
public static void main(String[] args) throws InterruptedException {
int numCores = Runtime.getRuntime().availableProcessors();
ExecutorService exec = Executors.newFixedThreadPool(numCores);
System.out.println("Warming up... " + numCores + " cores detected");
for (int n = 0; n < 2; n++)
for (int i = 3; i <= 7; i++) {
final int numThreads = numCores;
final int testSize = (int) Math.pow(10, i);
final int[] a = new int[testSize];
final int[] b = new int[testSize];
final int[] c = new int[testSize];
Arrays.fill(a, 56456810);
Arrays.fill(b, 74779);
List<Callable<Void>> tasks = new ArrayList<Callable<Void>>();
for (int y = 0; y < numThreads; y++) {
final int h = y;
tasks.add(new Callable<Void>() {
@Override
public Void call() throws Exception {
for (int z = h * testSize / numThreads; z < (h + 1) * testSize / numThreads; z++) {
c[z] = op(a[z], b[z]);
}
return null;
}
});
}
exec.invokeAll(tasks);
}
System.out.println("Performing tests...\n");
for (int n = 1; n <= numCores; n++) {
for (int i = 3; i <= 8; i++) {
final int numThreads = n;
exec.shutdownNow();
exec.awaitTermination(1, TimeUnit.SECONDS);
exec = Executors.newFixedThreadPool(numThreads);
final int testSize = (int) Math.pow(10, i);
final int[] a = new int[testSize];
final int[] b = new int[testSize];
final int[] c = new int[testSize];
Arrays.fill(a, 56456810);
Arrays.fill(b, 74779);
List<Callable<Void>> tasks = new ArrayList<Callable<Void>>();
for (int y = 0; y < numThreads; y++) {
final int h = y;
tasks.add(new Callable<Void>() {
@Override
public Void call() {
for (int z = h * testSize / numThreads; z < (h + 1) * testSize / numThreads; z++) {
c[z] = op(a[z], b[z]);
}
return null;
}
});
}
long time = System.nanoTime();
exec.invokeAll(tasks);
time = System.nanoTime() - time;
int errors = 0;
final int desiredResult = op(56456810, 74779);
for (int u = 0; u < c.length; u++) {
if (c[u] != desiredResult) errors++;
}
if (errors > 0) System.out.println("\t" + errors * 100. / testSize + "% errors!");
double nsPerOp = (double) time / testSize;
double opsPerSecondPerThread = testSize / (time / 1000000000d) / numThreads;
System.out.printf("Finished: %d Thread" + (numThreads > 1 ? "s" : "") + " performing %s operations at %.2f ns/op (%dK op/s/thread)\n", numThreads, 10 + "^" +(int) Math.log10(testSize), nsPerOp, (int) (opsPerSecondPerThread / 1000));
}
System.out.println();
}
System.out.println("All tests finished, shutting down...");
exec.shutdownNow();
exec.awaitTermination(3, TimeUnit.SECONDS);
}
//give your ALU a workout!
//default: testing small loop optimization and integer arithmetic
private static final int op(int a, int b) {
int r = 0;
for (int i = 1; i <= 10; i++)
r += ((a + b) * (a / b + b + a) & i % b << a / b + a - 7 * b + (a - b + a * b ^ b & a)) % (a * 7 + b * (b / a + 5 ^ a * b) & b - a) * 8 + (i + 1) / a * a ^ b * a << b / b >> a;
return r;
}
}
Yields this on an AMD Phenom II x4 955 (3.4Ghz quad), Java 7: