OpenCL is one solution, ill give the code how far i got.
Its saves a few ms (with me, it took like 300-400 ms instead of 500).
It just makes evrything much more complicated.
Baseclass:
static{
try {
CL.create();
} catch (LWJGLException ex) {
ex.printStackTrace();
}
}
protected CLContext context;
protected CLCommandQueue queue;
protected CLProgram program;
protected CLKernel kernel;
protected CLDevice divice;
protected String name, data;
public Computing_base(String name, String data) {
this.name = name;
this.data = data;
create();
allocate();
loadKernel();
}
protected final boolean create(){
try {
CLPlatform platform = CLPlatform.getPlatforms().get(0);
List<CLDevice> devices = platform.getDevices(CL_DEVICE_TYPE_GPU);
context = CLContext.create(platform, devices, null, null, null);
divice = devices.get(0);
queue = clCreateCommandQueue(context, divice, CL_QUEUE_PROFILING_ENABLE, null);
} catch (LWJGLException ex) {
ex.printStackTrace();
return false;
}
return true;
}
protected final void loadKernel(){
// program/kernel creation
program = clCreateProgramWithSource(context, data, null);
Util.checkCLError(clBuildProgram(program, divice, "", null));
// sum has to match a kernel method name in the OpenCL source
kernel = clCreateKernel(program, name, null);
}
public void Dispose(){
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
protected abstract void allocate();
public abstract void excecute();
protected static FloatBuffer toFloatBuffer(float[] floats) {
FloatBuffer buf = BufferUtils.createFloatBuffer(floats.length).put(floats);
buf.rewind();
return buf;
}
protected static void print(FloatBuffer buffer) {
for (int i = 0; i < buffer.capacity(); i++) {
System.out.print(buffer.get(i)+" ");
}
System.out.println("");
}
My noise extend:
public class Computing_noise extends Computing_base {
private FloatBuffer a, b, answer;
private CLMem aMem, bMem, answerMem;
public Computing_noise(){
super("perlin3d", FileManager.readFileAsString("D://noise2.txt"));
}
@Override
protected void allocate() {
}
public void setDimensions(int z, float step, int w, int h, int d){
a = toFloatBuffer(new float[]{ z, step, w, h, d });
aMem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, a, null);
clEnqueueWriteBuffer(queue, aMem, 1, 0, a, null, null);
/* Prepare gradients. */
int gsize = (w + 1) * (h + 1) * (d + 1);
int size = (int) (w * h);
Random rng = new Random();
float[] vectors = new float[3 * gsize];
for (int i = 0; i < vectors.length; i++) {
vectors[i] = rng.nextFloat() * 2f - 1f;
}
b = toFloatBuffer(vectors);
answer = BufferUtils.createFloatBuffer(size);
/* Allocate memory */
bMem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, b, null);
clEnqueueWriteBuffer(queue, bMem, 1, 0, b, null, null);
answerMem = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, answer, null);
clFinish(queue);
}
@Override
public void excecute() {
// execution
PointerBuffer kernel1DGlobalWorkSize = BufferUtils.createPointerBuffer(1);
kernel1DGlobalWorkSize.put(0, answer.capacity());
kernel.setArg(0, bMem);
kernel.setArg(1, aMem);
kernel.setArg(2, answerMem);
clEnqueueNDRangeKernel(queue, kernel, 1, null, kernel1DGlobalWorkSize, null, null, null);
// read the results back
clEnqueueReadBuffer(queue, answerMem, 1, 0, answer, null, null);
clFinish(queue);
// print(a);
// System.out.println("+");
// print(b);
// System.out.println("=");
//print(answer);
}
public float[] getResult(){
float[] s = new float[answer.capacity()];
answer.flip();
answer.get(s);
return s;
}
}
Noise code (noise2.txt):
/* Returns a pointer to the gradient vector for the given grid point. */
global const float *get_gradient(global const float *gradients,
const float w, const float h, const float *c)
{
int base = (c[0] + c[1] * w + c[2] * w * h) * 3;
return gradients + base;
}
float calc_magnitude(global const float *g, const float *c, const float *p)
{
return g[0] * (p[0] - c[0]) + g[1] * (p[1] - c[1]) + g[2] * (p[2] - c[2]);
}
float weight(const float *c, const float *p)
{
float t0 = 1 - fabs(c[0] - p[0]);
float t1 = 1 - fabs(c[1] - p[1]);
float t2 = 1 - fabs(c[2] - p[2]);
return (3 * pown(t0, 2) - 2 * pown(t0, 3))
* (3 * pown(t1, 2) - 2 * pown(t1, 3))
* (3 * pown(t2, 2) - 2 * pown(t2, 3));
}
kernel void
perlin3d(global const float *gradients,
global const float *params,
global float *value)
{
/* Fetch and calculate parameters. */
unsigned int id = get_global_id(0);
float w = params[2]; // area width (x)
float h = params[3]; // area height (y)
float d = params[4]; // area depth (z)
float x = fmod(id, w); // x-position to sample
float y = floor(id / w); // y-position to sample
float z = d; // z-position to sample
const float p[] = {x, y, z};
/* Calculate grid corners. */
const float c000[] = {floor(x), floor(y), floor(z)};
const float c001[] = {c000[0] + 0, c000[1] + 0, c000[2] + 1};
const float c010[] = {c000[0] + 0, c000[1] + 1, c000[2] + 0};
const float c011[] = {c000[0] + 0, c000[1] + 1, c000[2] + 1};
const float c100[] = {c000[0] + 1, c000[1] + 0, c000[2] + 0};
const float c101[] = {c000[0] + 1, c000[1] + 0, c000[2] + 1};
const float c110[] = {c000[0] + 1, c000[1] + 1, c000[2] + 0};
const float c111[] = {c000[0] + 1, c000[1] + 1, c000[2] + 1};
/* Find each of the grid gradients. */
global const float *g000 = get_gradient(gradients, w, h, c000);
global const float *g001 = get_gradient(gradients, w, h, c001);
global const float *g010 = get_gradient(gradients, w, h, c010);
global const float *g011 = get_gradient(gradients, w, h, c011);
global const float *g100 = get_gradient(gradients, w, h, c100);
global const float *g101 = get_gradient(gradients, w, h, c101);
global const float *g110 = get_gradient(gradients, w, h, c110);
global const float *g111 = get_gradient(gradients, w, h, c111);
/* Dot products. */
float m000 = calc_magnitude(g000, c000, p);
float m001 = calc_magnitude(g001, c001, p);
float m010 = calc_magnitude(g010, c010, p);
float m011 = calc_magnitude(g011, c011, p);
float m100 = calc_magnitude(g100, c100, p);
float m101 = calc_magnitude(g101, c101, p);
float m110 = calc_magnitude(g110, c110, p);
float m111 = calc_magnitude(g111, c111, p);
/* Weights. */
float w000 = weight(c000, p);
float w001 = weight(c001, p);
float w010 = weight(c010, p);
float w011 = weight(c011, p);
float w100 = weight(c100, p);
float w101 = weight(c101, p);
float w110 = weight(c110, p);
float w111 = weight(c111, p);
value[id] =
w000 * m000
+ w001 * m001
+ w010 * m010
+ w011 * m011
+ w100 * m100
+ w101 * m101
+ w110 * m110
+ w111 * m111;
}