Package com.jogamp.opencl.demos.radixsort

Source Code of com.jogamp.opencl.demos.radixsort.Scan

/*
* 22:12 Sunday, February 28 2010
*/
package com.jogamp.opencl.demos.radixsort;

import com.jogamp.opencl.CLBuffer;
import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLKernel;
import com.jogamp.opencl.CLProgram;
import com.jogamp.opencl.CLResource;
import java.io.IOException;

import static com.jogamp.opencl.CLMemory.Mem.*;
import static com.jogamp.opencl.CLProgram.CompilerOptions.*;

/**
*
* @author Michael Bien
*/
public class Scan implements CLResource {

    private final static int MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE = 1024;
    private final static int MAX_LOCAL_GROUP_SIZE = 256;
    private final static int WORKGROUP_SIZE = 256;
    private final static int MAX_BATCH_ELEMENTS = 64 * 1048576;
    private final static int MIN_SHORT_ARRAY_SIZE = 4;
    private final static int MAX_SHORT_ARRAY_SIZE = 4 * WORKGROUP_SIZE;
    private final static int MIN_LARGE_ARRAY_SIZE = 8 * WORKGROUP_SIZE;
    private final static int MAX_LARGE_ARRAY_SIZE = 4 * WORKGROUP_SIZE * WORKGROUP_SIZE;

    private final CLKernel ckScanExclusiveLocal1;
    private final CLKernel ckScanExclusiveLocal2;
    private final CLKernel ckUniformUpdate;

    private final CLCommandQueue queue;
    private final CLProgram program;
    private CLBuffer<?> buffer;

    public Scan(CLCommandQueue queue, int numElements) throws IOException {

        this.queue = queue;

        CLContext context = queue.getContext();
        if (numElements > MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE) {
            buffer = context.createBuffer(numElements / MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE * 4, READ_WRITE);
        }
        program = context.createProgram(getClass().getResourceAsStream("Scan_b.cl"))
                         .build(ENABLE_MAD);

        ckScanExclusiveLocal1 = program.createCLKernel("scanExclusiveLocal1");
        ckScanExclusiveLocal2 = program.createCLKernel("scanExclusiveLocal2");
        ckUniformUpdate       = program.createCLKernel("uniformUpdate");
    }

    // main exclusive scan routine
    void scanExclusiveLarge(CLBuffer<?> dst, CLBuffer<?> src, int batchSize, int arrayLength) {

        //Check power-of-two factorization
        if(!isPowerOf2(arrayLength)) {
            throw new RuntimeException();
        }

        //Check supported size range
        if (!((arrayLength >= MIN_LARGE_ARRAY_SIZE) && (arrayLength <= MAX_LARGE_ARRAY_SIZE))) {
            throw new RuntimeException();
        }

        //Check total batch size limit
        if (!((batchSize * arrayLength) <= MAX_BATCH_ELEMENTS)) {
            throw new RuntimeException();
        }

        scanExclusiveLocal1(dst, src, (batchSize * arrayLength) / (4 * WORKGROUP_SIZE), 4 * WORKGROUP_SIZE);
        scanExclusiveLocal2(buffer, dst, src, batchSize, arrayLength / (4 * WORKGROUP_SIZE));
        uniformUpdate(dst, buffer, (batchSize * arrayLength) / (4 * WORKGROUP_SIZE));
    }

    void scanExclusiveLocal1(CLBuffer<?> dst, CLBuffer<?> src, int n, int size) {

        ckScanExclusiveLocal1.putArg(dst).putArg(src).putArgSize(2 * WORKGROUP_SIZE * 4).putArg(size)
                             .rewind();

        int localWorkSize = WORKGROUP_SIZE;
        int globalWorkSize = (n * size) / 4;

        queue.put1DRangeKernel(ckScanExclusiveLocal1, 0, globalWorkSize, localWorkSize);
    }

    void scanExclusiveLocal2(CLBuffer<?> buffer, CLBuffer<?> dst, CLBuffer<?> src, int n, int size) {

        int elements = n * size;
        ckScanExclusiveLocal2.putArg(buffer).putArg(dst).putArg(src).putArgSize(2 * WORKGROUP_SIZE * 4)
                             .putArg(elements).putArg(size).rewind();

        int localWorkSize = WORKGROUP_SIZE;
        int globalWorkSize = iSnapUp(elements, WORKGROUP_SIZE);

        queue.put1DRangeKernel(ckScanExclusiveLocal2, 0, globalWorkSize, localWorkSize);
    }

    void uniformUpdate(CLBuffer<?> dst, CLBuffer<?> buffer, int n) {

        ckUniformUpdate.setArgs(dst, buffer);

        int localWorkSize  = WORKGROUP_SIZE;
        int globalWorkSize = n * WORKGROUP_SIZE;

        queue.put1DRangeKernel(ckUniformUpdate, 0, globalWorkSize, localWorkSize);
    }

    private int iSnapUp(int dividend, int divisor) {
        return ((dividend % divisor) == 0) ? dividend : (dividend - dividend % divisor + divisor);
    }

    public static boolean isPowerOf2(int x) {
        return ((x - 1) & x) == 0;
    }

    public void release() {
        program.release();

        if(buffer!=null) {
            buffer.release();
        }
    }

    @Override
    public boolean isReleased() {
        return program.isReleased();
    }

    public void close() {
        release();
    }
}
TOP

Related Classes of com.jogamp.opencl.demos.radixsort.Scan

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.