Package com.nativelibs4java.opencl

Examples of com.nativelibs4java.opencl.CLContext


import com.nativelibs4java.util.NIOUtils;

public class SimpleTest {
  public static void main(String[] args) {
    System.out.println("Start");
    CLContext context = JavaCL.createBestContext();
    CLQueue queue = context.createDefaultQueue();

    float[] A = { 1.0f, 2.0f, 3.0f, 4.0f };
    float[] B = { 5.0f, 6.0f, 7.0f, 8.0f };
       
    String myKernelSource = ResourceInputStream.readResourceAsString("org/encog/plugins/opencl/kernels/simple.cl");
   
    FloatBuffer array1 = NIOUtils.directFloats(10, context.getByteOrder());
    FloatBuffer array2 = NIOUtils.directFloats(10, context.getByteOrder());
    FloatBuffer resultArray = NIOUtils.directFloats(10, context.getByteOrder());
   
    array1.put(A);
    array2.put(B);
   
    CLFloatBuffer b1 = context.createFloatBuffer(Usage.Input, array1, true);
    CLFloatBuffer b2 = context.createFloatBuffer(Usage.Input, array2, true);
    CLFloatBuffer b3 = context.createFloatBuffer(Usage.Output, resultArray, false);

    CLProgram program;
    try {
      program = context.createProgram(myKernelSource).build();
      CLKernel kernel = program.createKernel(
              "simpleKernel",
              b1,
              b2,
              b3
View Full Code Here


    }

    public static void main(String[] args) throws IOException, CLBuildException {
     // Create a context with the best double numbers support possible :
     // (try using DeviceFeature.GPU, DeviceFeature.CPU...)
        CLContext context = JavaCL.createBestContext(DeviceFeature.DoubleSupport);
       
        // Create a command queue, if possible able to execute multiple jobs in parallel
        // (out-of-order queues will still respect the CLEvent chaining)
        CLQueue queue = context.createDefaultOutOfOrderQueueIfPossible();

        DFT dft = new DFT(queue);
        //DFT2 dft = new DFT2(queue);

        // Create some fake test data :
View Full Code Here

        System.out.println("");
        System.out.println("");
       
        int dataSize = v1.length;
       
        CLContext context = JavaCL.createBestContext();
        CLQueue queue = context.createDefaultQueue();
        ByteOrder byteOrder = context.getByteOrder();
       
       
        DoubleBuffer v1_b = NIOUtils.directDoubles(dataSize, byteOrder);
        DoubleBuffer v2_b = NIOUtils.directDoubles(v2.length, byteOrder);
       
        FuncoesGPU.preencheBuffer(v1_b, v1);
        FuncoesGPU.preencheBuffer(v2_b, v2);       
       
        CLBuffer<Double> buffer_v1 = context.createDoubleBuffer(CLMem.Usage.Input,v1_b, true);
        CLBuffer<Double> buffer_v2 = context.createDoubleBuffer(CLMem.Usage.Input,v2_b, true);
        CLBuffer<Double> buffer_v3 = context.createDoubleBuffer(CLMem.Usage.Output, dataSize);
       
        String src = IOUtils.readText(new File("matvec.cl"));
        CLProgram program = context.createProgram(src);
        CLKernel prod_escalar = program.createKernel("prod_escalar");
        prod_escalar.setArgs(buffer_v1, buffer_v2, buffer_v3, dataSize, dataSize);
       
        CLEvent prodEvt = prod_escalar.enqueueNDRange(queue, new int[] { dataSize });
View Full Code Here

        //leitura dos dados de entrada
        double[][] entradas = FuncoesCPU.lerArquivoEntradas(Param.nomeArquivo);

        //configuracao do opencl via JavaCL
        CLContext context = JavaCL.createBestContext();
        CLQueue queue = context.createDefaultQueue();
        ByteOrder byteOrder = context.getByteOrder();

        ArrayList<CLBuffer<Double>> clBufferEntradas = new ArrayList<CLBuffer<Double>>(entradas.length);
        for (int i = 0; i < entradas.length; i++) {
            DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[i].length, byteOrder);
            FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[i]);
            clBufferEntradas.add(context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true));
        }

        CLBuffer<Double> clBufferEntrada = null;

        //leitura do arquivo cl e compilacao do programa
        String src = IOUtils.readText(new File("matvec.cl"));
        CLProgram program = context.createProgram(src);
        //CLKernel kernel = null;
        //CLEvent prodEvt = null;
        CLKernel kernelProdEscalar = program.createKernel("prod_escalar");
        CLKernel kernelS2 = program.createKernel("s2");
        CLKernel kernelS1 = program.createKernel("s1");
        CLKernel kernelAtualizaPesos3 = program.createKernel("atualiza_pesos_3");
        CLKernel kernelAtualizaPesos2 = program.createKernel("atualiza_pesos_2");
        CLKernel kernelAtualizaPesos1 = program.createKernel("atualiza_pesos_1");

        //----------------------------VARIAVEIS DA 1a CAMADA
        int qtdNeuronios_1 = 12;
        //gerado como vetor para facilitar o uso no kernel
        double[] pesos_1 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1 * qtdNeuronios_1, Param.min, Param.max);
        double[] pesos_1_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1, Param.min, Param.max);
        //double[] saida_camada_1 = new double[qtdNeuronios_1];

        DoubleBuffer dBufferPesos1 = NIOUtils.directDoubles(pesos_1.length * pesos_1.length, byteOrder);
        DoubleBuffer dBufferPesosBias1 = NIOUtils.directDoubles(pesos_1_bias.length, byteOrder);

        FuncoesGPU.preencheBuffer(dBufferPesos1, pesos_1);
        FuncoesGPU.preencheBuffer(dBufferPesosBias1, pesos_1_bias);

        CLBuffer<Double> clBufferPesos1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos1, true);
        CLBuffer<Double> clBufferPesosBias1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias1, true);
        CLBuffer<Double> clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);

        //----------------------------VARIAVEIS DA 2a CAMADA
        int qtdNeuronios_2 = 6;
        double[] pesos_2 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2 * qtdNeuronios_1, Param.min, Param.max);
        double[] pesos_2_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2, Param.min, Param.max);
        //double[] saida_camada_2 = new double[qtdNeuronios_2];

        DoubleBuffer dBufferPesos2 = NIOUtils.directDoubles(qtdNeuronios_2 * qtdNeuronios_1, byteOrder);
        DoubleBuffer dBufferPesosBias2 = NIOUtils.directDoubles(pesos_2_bias.length, byteOrder);

        FuncoesGPU.preencheBuffer(dBufferPesos2, pesos_2);
        FuncoesGPU.preencheBuffer(dBufferPesosBias2, pesos_2_bias);

        CLBuffer<Double> clBufferPesos2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos2, true);
        CLBuffer<Double> clBufferPesosBias2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias2, true);
        CLBuffer<Double> clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);

        //----------------------------VARIAVEIS DA 3a CAMADA
        int qtdNeuronios_3 = 1;
        double[] dvPesos3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3 * qtdNeuronios_2, Param.min, Param.max);
        double[] dvPesosBias3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3, Param.min, Param.max);

        DoubleBuffer dBufferPesos3 = NIOUtils.directDoubles(qtdNeuronios_3 * qtdNeuronios_2, byteOrder);
        DoubleBuffer dBufferPesosBias3 = NIOUtils.directDoubles(dvPesosBias3.length, byteOrder);

        FuncoesGPU.preencheBuffer(dBufferPesos3, dvPesos3);
        FuncoesGPU.preencheBuffer(dBufferPesosBias3, dvPesosBias3);

        CLBuffer<Double> clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos3, true);
        CLBuffer<Double> clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);
        CLBuffer<Double> clBufferSaida3 = context.createDoubleBuffer(CLMem.Usage.Output, qtdNeuronios_3);

        // VARIAVEIS DO BACKPROPAGATION
        CLBuffer<Double> clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);
        CLBuffer<Double> clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);
        double dSaidaFinal = 0.0;
        double erro = 0.0;
        double s3 = 0.0;

        int epocas = 1000;
        int tamanhoTreinamento = (int) (entradas.length * 0.85);
        int indiceTeste = tamanhoTreinamento;

        long init = System.currentTimeMillis();
        double percentualErro = 0.0;
        for (int epoca = 0; epoca < epocas; epoca++) {

            for (int e = 0; e < tamanhoTreinamento; e++) {

                //TODO possivel ponto de latencia. Para toda entrada vai ter outro
                // 'for' somente para preencher o buffer.
                //DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[e].length, byteOrder);
                //FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[e]);
                //CLBuffer<Double> clBufferEntrada = context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true);

                clBufferEntrada = clBufferEntradas.get(0);

                //kernel = program.createKernel("prod_escalar");

                /*args
                 * input
                 * pesos
                 * pesos bias
                 * result
                 * quantidade de neuronios
                 * quantidade pesos por neuronio
                 */
                // PRIMEIRA CAMADA
                kernelProdEscalar.setArgs(clBufferEntrada, clBufferPesos1, clBufferPesosBias1,
                        clBufferSaida1, qtdNeuronios_1, qtdNeuronios_1);
                //aqui diz quantos work itens trabalharao para executar o kernel
                CLEvent prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
                //faz a leitura do 'result'
                //DoubleBuffer dBufferResSaida1 = clBufferSaida1.read(queue,prodEvt);

                // SEGUNDA CAMADA
                clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.Input,
                        (DoubleBuffer) clBufferSaida1.read(queue, prodEvt), true);

                //TODO esta passando mais neuronios do que tem. Verica depois
                //pois nos testes funcionou. Tanto nos argumentos quanto no kernel
                kernelProdEscalar.setArgs(clBufferSaida1, clBufferPesos2, clBufferPesosBias2,
                        clBufferSaida2, qtdNeuronios_1, qtdNeuronios_1);

                prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});

                //DoubleBuffer dBufferResSaida2 = clBufferSaida2.read(queue, prodEvt);

                //TERCEIRA CAMADA
                clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.Input,
                        (DoubleBuffer) clBufferSaida2.read(queue, prodEvt), true);

                //TODO esta passando mais neuronios do que tem. Verica depois
                //pois nos testes funcionou. Tanto nos argumentos quanto no kernel
                kernelProdEscalar.setArgs(clBufferSaida2, clBufferPesos3, clBufferPesosBias3,
                        clBufferSaida3, qtdNeuronios_2, qtdNeuronios_2);

                prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_2});

                //DoubleBuffer dBufferSaidaFinal = clBufferSaida3.read(queue,prodEvt);

                // BACKPROPAGATION
                dSaidaFinal = ((DoubleBuffer) clBufferSaida3.read(queue, prodEvt)).get(0);
                erro = Param.target - dSaidaFinal;
                percentualErro = Math.abs((erro / Param.target) * 100);
                s3 = -2 * FuncoesCPU.derivativeSigmoid(dSaidaFinal) * erro;

                //kernel = program.createKernel("s2");
                kernelS2.setArgs(clBufferPesos3, dSaidaFinal, erro,
                        clBufferSaida2, clBufferS2);

                prodEvt = kernelS2.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
                //DoubleBuffer dBufferResS2 = clBufferS2.read(queue,prodEvt);


                clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferS2.read(queue, prodEvt), true);
                //kernel = program.createKernel("s1");
                kernelS1.setArgs(clBufferPesos2,
                        clBufferS2,
                        clBufferSaida1,
                        clBufferS1,
                        qtdNeuronios_2);

                prodEvt = kernelS1.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
                //DoubleBuffer dBufferResS1 = clBufferS1.read(queue,prodEvt);

                clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferS1.read(queue, prodEvt), true);


                //ATUALIZANDO OS PESOS
                //CAMADA 3
                //kernel = program.createKernel("atualiza_pesos_3");
                kernelAtualizaPesos3.setArgs(clBufferPesos3, clBufferPesos2, dSaidaFinal,
                        erro, Param.taxaAprendizado);
                prodEvt = kernelAtualizaPesos3.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
                //DoubleBuffer dBufferResPesos3 = clBufferPesos3.read(queue,prodEvt);

                clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferPesos3.read(queue, prodEvt), true);

                dBufferPesosBias3.put(0, dBufferPesosBias3.get(0) - (Param.taxaAprendizado * s3 * 1));
                clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);

                //CAMADA 2
                //kernel = program.createKernel("atualiza_pesos_2");
                kernelAtualizaPesos2.setArgs(clBufferPesos2, clBufferS2, clBufferSaida1, qtdNeuronios_1,
                        Param.taxaAprendizado, clBufferPesosBias2);

                prodEvt = kernelAtualizaPesos2.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
                //DoubleBuffer dBufferResPesos2 = clBufferPesos2.read(queue,prodEvt);
                //DoubleBuffer dBufferResPesosBias2 = clBufferPesosBias2.read(queue,prodEvt);

                clBufferPesos2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferPesos2.read(queue, prodEvt), true);
                clBufferPesosBias2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferPesosBias2.read(queue, prodEvt), true);

                //CAMADA 3
                //kernel = program.createKernel("atualiza_pesos_1");
                kernelAtualizaPesos1.setArgs(clBufferPesos1, clBufferS1, clBufferEntrada,
                        qtdNeuronios_1, Param.taxaAprendizado, clBufferPesosBias1);
                prodEvt = kernelAtualizaPesos1.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
                //DoubleBuffer dBufferResPesos1 = clBufferPesos1.read(queue,prodEvt);
                //DoubleBuffer dBufferResPesosBias1 = clBufferPesosBias1.read(queue,prodEvt);

                clBufferPesos1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferPesos1.read(queue, prodEvt), true);
                clBufferPesosBias1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
                        (DoubleBuffer) clBufferPesosBias1.read(queue, prodEvt), true);

            }
        }

        System.out.println("TESTE");
        for (int e = indiceTeste; e < entradas.length; e++) {
            // PRIMEIRA CAMADA
            kernelProdEscalar.setArgs(clBufferEntrada, clBufferPesos1, clBufferPesosBias1,
                    clBufferSaida1, qtdNeuronios_1, qtdNeuronios_1);
            //aqui diz quantos work itens trabalharao para executar o kernel
            CLEvent prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
            //faz a leitura do 'result'
            //DoubleBuffer dBufferResSaida1 = clBufferSaida1.read(queue,prodEvt);

            // SEGUNDA CAMADA
            clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.Input,
                    (DoubleBuffer) clBufferSaida1.read(queue, prodEvt), true);

            //TODO esta passando mais neuronios do que tem. Verica depois
            //pois nos testes funcionou. Tanto nos argumentos quanto no kernel
            kernelProdEscalar.setArgs(clBufferSaida1, clBufferPesos2, clBufferPesosBias2,
                    clBufferSaida2, qtdNeuronios_1, qtdNeuronios_1);

            prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});

            //DoubleBuffer dBufferResSaida2 = clBufferSaida2.read(queue, prodEvt);

            //TERCEIRA CAMADA
            clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.Input,
                    (DoubleBuffer) clBufferSaida2.read(queue, prodEvt), true);

            //TODO esta passando mais neuronios do que tem. Verica depois
            //pois nos testes funcionou. Tanto nos argumentos quanto no kernel
            kernelProdEscalar.setArgs(clBufferSaida2, clBufferPesos3, clBufferPesosBias3,
View Full Code Here

TOP

Related Classes of com.nativelibs4java.opencl.CLContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.