checkError("on clEnqueueWriteBuffer", ret);
// Launch kernel
NativeSizeBuffer gWS = NativeSizeBuffer.allocateDirect(1).put(globalWorkSize).rewind();
NativeSizeBuffer lWS = NativeSizeBuffer.allocateDirect(1).put(localWorkSize).rewind();
ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, gWS, lWS, 0, null, null);
checkError("on clEnqueueNDRangeKernel", ret);
// Synchronous/blocking read of results
ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, null);
checkError("on clEnqueueReadBuffer", ret);