Source Code of com.jogamp.opencl.demos.bitonicsort.BitonicSort

/*
 * 18:42 Saturday, February 27 2010
 */
package com.jogamp.opencl.demos.bitonicsort;


import com.jogamp.opencl.CLBuffer;
import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLDevice;
import com.jogamp.opencl.CLKernel;
import com.jogamp.opencl.CLProgram;
import java.io.IOException;
import java.nio.IntBuffer;
import java.util.Map;
import java.util.Random;


import static java.lang.System.*;
import static com.jogamp.opencl.CLMemory.Mem.*;
import static com.jogamp.opencl.CLProgram.*;


/**
 * Bitonic sort optimized for GPUs.
 * Uses NVIDIA's bitonic merge sort kernel.
 * @author Michael Bien
 */
public class BitonicSort {


    private static final String BITONIC_MERGE_GLOBAL = "bitonicMergeGlobal";
    private static final String BITONIC_MERGE_LOCAL  = "bitonicMergeLocal";
    private static final String BITONIC_SORT_LOCAL   = "bitonicSortLocal";
    private static final String BITONIC_SORT_LOCAL1  = "bitonicSortLocal1";


    private final static int LOCAL_SIZE_LIMIT = 1024;
    private final Map<String, CLKernel> kernels;


    public BitonicSort() throws IOException {


        final int sortDir  = 1;
        final int elements = 1048576;
        final int maxvalue = 1000000;


        out.println("Initializing OpenCL...");


        //Create the context
        CLContext context = null;


        try{


            context = CLContext.create();
            CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();


            out.println("Initializing OpenCL bitonic sorter...");
            kernels = initBitonicSort(queue);


            out.println("Creating OpenCL memory objects...");
            CLBuffer<IntBuffer> keyBuffer = context.createIntBuffer(elements, READ_ONLY, USE_BUFFER);
            System.out.println(keyBuffer.getCLSize()/1000000.0f);


            out.println("Initializing data...\n");
            Random random = new Random();
            for (int i = 0; i < elements; i++) {
                int rnd = random.nextInt(maxvalue);
                keyBuffer.getBuffer().put(i, rnd);
            }


            int arrayLength = elements;
            int batch = elements / arrayLength;


            out.printf("Test array length %d (%d arrays in the batch)...\n", arrayLength, batch);


            long time = currentTimeMillis();


            bitonicSort(queue, keyBuffer, keyBuffer, batch, arrayLength, sortDir);
            queue.putReadBuffer(keyBuffer, true);


            out.println(currentTimeMillis() - time+"ms");


            IntBuffer keys = keyBuffer.getBuffer();
            printSnapshot(keys, 20);
            checkIfSorted(keys);


            out.println("\nTEST PASSED");
        
        }finally{
            if(context!=null) {
                context.release();
            }
        }


    }
    
    private Map<String, CLKernel> initBitonicSort(CLCommandQueue queue) throws IOException {


        out.println("    creating bitonic sort program");


        CLContext context = queue.getContext();


        CLProgram program = context.createProgram(getClass().getResourceAsStream("BitonicSort.cl"))
                                   .build(define("LOCAL_SIZE_LIMIT", LOCAL_SIZE_LIMIT));


        Map<String, CLKernel> kernelMap = program.createCLKernels();


        out.println("    checking minimum supported workgroup size");
        //Check for work group size
        CLDevice device = queue.getDevice();
        long szBitonicSortLocal  = kernelMap.get(BITONIC_SORT_LOCAL).getWorkGroupSize(device);
        long szBitonicSortLocal1 = kernelMap.get(BITONIC_SORT_LOCAL1).getWorkGroupSize(device);
        long szBitonicMergeLocal = kernelMap.get(BITONIC_MERGE_LOCAL).getWorkGroupSize(device);


        if (    (szBitonicSortLocal < (LOCAL_SIZE_LIMIT / 2))
             || (szBitonicSortLocal1 < (LOCAL_SIZE_LIMIT / 2))
             || (szBitonicMergeLocal < (LOCAL_SIZE_LIMIT / 2))  ) {
            throw new RuntimeException("Minimum work-group size "+LOCAL_SIZE_LIMIT/2
                    +" required by this application is not supported on this device.");
        }


        return kernelMap;


    }


    public void bitonicSort(CLCommandQueue queue, CLBuffer<?> dstKey, CLBuffer<?> srcKey, int batch, int arrayLength, int dir) {


        if (arrayLength < 2) {
            throw new IllegalArgumentException("arrayLength was "+arrayLength);
        }


        // TODO Only power-of-two array lengths are supported so far


        dir = (dir != 0) ? 1 : 0;


        CLKernel sortlocal1  = kernels.get(BITONIC_SORT_LOCAL1);
        CLKernel sortlocal   = kernels.get(BITONIC_SORT_LOCAL);
        CLKernel mergeGlobal = kernels.get(BITONIC_MERGE_GLOBAL);
        CLKernel mergeLocal  = kernels.get(BITONIC_MERGE_LOCAL);


        if (arrayLength <= LOCAL_SIZE_LIMIT) {


            //        oclCheckError( (batch * arrayLength) % LOCAL_SIZE_LIMIT == 0, shrTRUE );


            //Launch bitonicSortLocal
            sortlocal.putArgs(dstKey, srcKey)
                     .putArg(arrayLength).putArg(dir).rewind();


            int localWorkSize = LOCAL_SIZE_LIMIT / 2;
            int globalWorkSize = batch * arrayLength / 2;
            queue.put1DRangeKernel(sortlocal, 0, globalWorkSize, localWorkSize);


        } else {


            //Launch bitonicSortLocal1
            sortlocal1.setArgs(dstKey, srcKey);


            int localWorkSize = LOCAL_SIZE_LIMIT / 2;
            int globalWorkSize = batch * arrayLength / 2;


            queue.put1DRangeKernel(sortlocal1, 0, globalWorkSize, localWorkSize);


            for (int size = 2 * LOCAL_SIZE_LIMIT; size <= arrayLength; size <<= 1) {
                for (int stride = size / 2; stride > 0; stride >>= 1) {
                    if (stride >= LOCAL_SIZE_LIMIT) {
                        //Launch bitonicMergeGlobal
                        mergeGlobal.putArgs(dstKey, dstKey)
                                   .putArg(arrayLength).putArg(size).putArg(stride).putArg(dir).rewind();


                        globalWorkSize = batch * arrayLength / 2;
                        queue.put1DRangeKernel(mergeGlobal, 0, globalWorkSize, 0);
                    } else {
                        //Launch bitonicMergeLocal
                        mergeLocal.putArgs(dstKey, dstKey)
                                  .putArg(arrayLength).putArg(stride).putArg(size).putArg(dir).rewind();


                        localWorkSize = LOCAL_SIZE_LIMIT / 2;
                        globalWorkSize = batch * arrayLength / 2;


                        queue.put1DRangeKernel(mergeLocal, 0, globalWorkSize, localWorkSize);
                        break;
                    }
                }
            }
        }
    }


    private void printSnapshot(IntBuffer buffer, int snapshot) {
        for(int i = 0; i < snapshot; i++)
            out.print(buffer.get() + ", ");
        out.println("...; " + buffer.remaining() + " more");
        buffer.rewind();
    }


    private void checkIfSorted(IntBuffer keys) {
        for (int i = 1; i < keys.capacity(); i++) {
            if (keys.get(i - 1) > keys.get(i)) {
                throw new RuntimeException("not sorted "+ keys.get(i - 1) +"!> "+ keys.get(i));
            }
        }
    }


    public static void main(String[] args) throws IOException {
        new BitonicSort();
    }
}
Source Code of com.jogamp.opencl.demos.bitonicsort.BitonicSort

Related Classes of com.jogamp.opencl.demos.bitonicsort.BitonicSort