Package com.jogamp.opencl.demos.bitonicsort

Source Code of com.jogamp.opencl.demos.bitonicsort.BitonicSort

/*
* 18:42 Saturday, February 27 2010
*/
package com.jogamp.opencl.demos.bitonicsort;

import com.jogamp.opencl.CLBuffer;
import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLDevice;
import com.jogamp.opencl.CLKernel;
import com.jogamp.opencl.CLProgram;
import java.io.IOException;
import java.nio.IntBuffer;
import java.util.Map;
import java.util.Random;

import static java.lang.System.*;
import static com.jogamp.opencl.CLMemory.Mem.*;
import static com.jogamp.opencl.CLProgram.*;

/**
* Bitonic sort optimized for GPUs.
* Uses NVIDIA's bitonic merge sort kernel.
* @author Michael Bien
*/
public class BitonicSort {

    private static final String BITONIC_MERGE_GLOBAL = "bitonicMergeGlobal";
    private static final String BITONIC_MERGE_LOCAL  = "bitonicMergeLocal";
    private static final String BITONIC_SORT_LOCAL   = "bitonicSortLocal";
    private static final String BITONIC_SORT_LOCAL1  = "bitonicSortLocal1";

    private final static int LOCAL_SIZE_LIMIT = 1024;
    private final Map<String, CLKernel> kernels;

    public BitonicSort() throws IOException {

        final int sortDir  = 1;
        final int elements = 1048576;
        final int maxvalue = 1000000;

        out.println("Initializing OpenCL...");

        //Create the context
        CLContext context = null;

        try{

            context = CLContext.create();
            CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();

            out.println("Initializing OpenCL bitonic sorter...");
            kernels = initBitonicSort(queue);

            out.println("Creating OpenCL memory objects...");
            CLBuffer<IntBuffer> keyBuffer = context.createIntBuffer(elements, READ_ONLY, USE_BUFFER);
            System.out.println(keyBuffer.getCLSize()/1000000.0f);

            out.println("Initializing data...\n");
            Random random = new Random();
            for (int i = 0; i < elements; i++) {
                int rnd = random.nextInt(maxvalue);
                keyBuffer.getBuffer().put(i, rnd);
            }

            int arrayLength = elements;
            int batch = elements / arrayLength;

            out.printf("Test array length %d (%d arrays in the batch)...\n", arrayLength, batch);

            long time = currentTimeMillis();

            bitonicSort(queue, keyBuffer, keyBuffer, batch, arrayLength, sortDir);
            queue.putReadBuffer(keyBuffer, true);

            out.println(currentTimeMillis() - time+"ms");

            IntBuffer keys = keyBuffer.getBuffer();
            printSnapshot(keys, 20);
            checkIfSorted(keys);

            out.println("\nTEST PASSED");
       
        }finally{
            if(context!=null) {
                context.release();
            }
        }

    }
   
    private Map<String, CLKernel> initBitonicSort(CLCommandQueue queue) throws IOException {

        out.println("    creating bitonic sort program");

        CLContext context = queue.getContext();

        CLProgram program = context.createProgram(getClass().getResourceAsStream("BitonicSort.cl"))
                                   .build(define("LOCAL_SIZE_LIMIT", LOCAL_SIZE_LIMIT));

        Map<String, CLKernel> kernelMap = program.createCLKernels();

        out.println("    checking minimum supported workgroup size");
        //Check for work group size
        CLDevice device = queue.getDevice();
        long szBitonicSortLocal  = kernelMap.get(BITONIC_SORT_LOCAL).getWorkGroupSize(device);
        long szBitonicSortLocal1 = kernelMap.get(BITONIC_SORT_LOCAL1).getWorkGroupSize(device);
        long szBitonicMergeLocal = kernelMap.get(BITONIC_MERGE_LOCAL).getWorkGroupSize(device);

        if (    (szBitonicSortLocal < (LOCAL_SIZE_LIMIT / 2))
             || (szBitonicSortLocal1 < (LOCAL_SIZE_LIMIT / 2))
             || (szBitonicMergeLocal < (LOCAL_SIZE_LIMIT / 2))  ) {
            throw new RuntimeException("Minimum work-group size "+LOCAL_SIZE_LIMIT/2
                    +" required by this application is not supported on this device.");
        }

        return kernelMap;

    }

    public void bitonicSort(CLCommandQueue queue, CLBuffer<?> dstKey, CLBuffer<?> srcKey, int batch, int arrayLength, int dir) {

        if (arrayLength < 2) {
            throw new IllegalArgumentException("arrayLength was "+arrayLength);
        }

        // TODO Only power-of-two array lengths are supported so far

        dir = (dir != 0) ? 1 : 0;

        CLKernel sortlocal1  = kernels.get(BITONIC_SORT_LOCAL1);
        CLKernel sortlocal   = kernels.get(BITONIC_SORT_LOCAL);
        CLKernel mergeGlobal = kernels.get(BITONIC_MERGE_GLOBAL);
        CLKernel mergeLocal  = kernels.get(BITONIC_MERGE_LOCAL);

        if (arrayLength <= LOCAL_SIZE_LIMIT) {

            //        oclCheckError( (batch * arrayLength) % LOCAL_SIZE_LIMIT == 0, shrTRUE );

            //Launch bitonicSortLocal
            sortlocal.putArgs(dstKey, srcKey)
                     .putArg(arrayLength).putArg(dir).rewind();

            int localWorkSize = LOCAL_SIZE_LIMIT / 2;
            int globalWorkSize = batch * arrayLength / 2;
            queue.put1DRangeKernel(sortlocal, 0, globalWorkSize, localWorkSize);

        } else {

            //Launch bitonicSortLocal1
            sortlocal1.setArgs(dstKey, srcKey);

            int localWorkSize = LOCAL_SIZE_LIMIT / 2;
            int globalWorkSize = batch * arrayLength / 2;

            queue.put1DRangeKernel(sortlocal1, 0, globalWorkSize, localWorkSize);

            for (int size = 2 * LOCAL_SIZE_LIMIT; size <= arrayLength; size <<= 1) {
                for (int stride = size / 2; stride > 0; stride >>= 1) {
                    if (stride >= LOCAL_SIZE_LIMIT) {
                        //Launch bitonicMergeGlobal
                        mergeGlobal.putArgs(dstKey, dstKey)
                                   .putArg(arrayLength).putArg(size).putArg(stride).putArg(dir).rewind();

                        globalWorkSize = batch * arrayLength / 2;
                        queue.put1DRangeKernel(mergeGlobal, 0, globalWorkSize, 0);
                    } else {
                        //Launch bitonicMergeLocal
                        mergeLocal.putArgs(dstKey, dstKey)
                                  .putArg(arrayLength).putArg(stride).putArg(size).putArg(dir).rewind();

                        localWorkSize = LOCAL_SIZE_LIMIT / 2;
                        globalWorkSize = batch * arrayLength / 2;

                        queue.put1DRangeKernel(mergeLocal, 0, globalWorkSize, localWorkSize);
                        break;
                    }
                }
            }
        }
    }

    private void printSnapshot(IntBuffer buffer, int snapshot) {
        for(int i = 0; i < snapshot; i++)
            out.print(buffer.get() + ", ");
        out.println("...; " + buffer.remaining() + " more");
        buffer.rewind();
    }

    private void checkIfSorted(IntBuffer keys) {
        for (int i = 1; i < keys.capacity(); i++) {
            if (keys.get(i - 1) > keys.get(i)) {
                throw new RuntimeException("not sorted "+ keys.get(i - 1) +"!> "+ keys.get(i));
            }
        }
    }

    public static void main(String[] args) throws IOException {
        new BitonicSort();
    }
}
TOP

Related Classes of com.jogamp.opencl.demos.bitonicsort.BitonicSort

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.