Package org.apache.pig.impl.io

Examples of org.apache.pig.impl.io.ReadToEndLoader


    public void testLFPig() throws Exception {
        Util.createInputFile(cluster, "input.txt", new String[]
                                        {"this:is:delimited:by:a:colon\n"});
        int arity1 = 6;
        LoadFunc lf = new PigStorage(":");
        LoadFunc p1 = new ReadToEndLoader(lf, ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f1 = p1.getNext();
        assertTrue(f1.size() == arity1);
        Util.deleteFile(cluster, "input.txt");
       
        int LOOP_COUNT = 100;
        String[] input = new String[LOOP_COUNT * LOOP_COUNT];
        int n = 0;
        for (int i = 0; i < LOOP_COUNT; i++) {
            for (int j = 0; j < LOOP_COUNT; j++) {
                input[n++] = (i + "\t" + i + "\t" + j % 2);
            }
        }
        Util.createInputFile(cluster, "input.txt", input);

        LoadFunc p15 = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
       
        int count = 0;
        while (true) {
            Tuple f15 = p15.getNext();
            if (f15 == null)
                break;
            count++;
            assertEquals(3, f15.size());
        }
        assertEquals(LOOP_COUNT * LOOP_COUNT, count);
        Util.deleteFile(cluster, "input.txt");
       
        String input2 = ":this:has:a:leading:colon\n";
        int arity2 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input2});
        LoadFunc p2 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f2 = p2.getNext();
        assertTrue(f2.size() == arity2);
        Util.deleteFile(cluster, "input.txt");
       
        String input3 = "this:has:a:trailing:colon:\n";
        int arity3 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input3});
        LoadFunc p3 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f3 = p3.getNext();
        assertTrue(f3.size() == arity3);
        Util.deleteFile(cluster, "input.txt");
    }
View Full Code Here


        Util.createInputFile(cluster,
                "/tmp/testLFTextdir1/testLFTextdir2/testLFTest-input1.txt",
                new String[] {input1});
        // check that loading the top level dir still reading the file a couple
        // of subdirs below
        LoadFunc text1 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "/tmp/testLFTextdir1", 0);
        Tuple f1 = text1.getNext();
        Tuple f2 = text1.getNext();
        assertTrue(expected1.equals(f1.get(0).toString()) &&
            expected2.equals(f2.get(0).toString()));
        Util.deleteFile(cluster, "testLFTest-input1.txt");
        Util.createInputFile(cluster, "testLFTest-input2.txt");
        LoadFunc text2 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "testLFTest-input2.txt", 0);
        Tuple f3 = text2.getNext();
        assertTrue(f3 == null);
        Util.deleteFile(cluster, "testLFTest-input2.txt");
    }
View Full Code Here

        String query = "a = load 'testSFPig-input.txt';" +
            "store a into 'testSFPig-output.txt';";
        pigServer.setBatchOn();
        Util.registerMultiLineQuery(pigServer, query);
        pigServer.executeBatch();
        LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "testSFPig-output.txt", 0);
        Tuple f2 = lfunc.getNext();
       
        assertEquals(f1, f2);
        Util.deleteFile(cluster, "testSFPig-input.txt");
        Util.deleteFile(cluster, "testSFPig-output.txt");
    }
View Full Code Here

            conf.set("mapreduce.job.credentials.binary",
                    System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
        }
       
        //create ReadToEndLoader that will read the given splits in order
        loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec),
                conf, inpLocation, splitsToBeRead);
    }
View Full Code Here

        if (value == null) {
            if (input == null || input.size() == 0)
                return null;

            int pos;
            ReadToEndLoader loader;
            try {
                pos = DataType.toInteger(input.get(0));
                scalarfilename = DataType.toString(input.get(1));
               
                // Hadoop security need this property to be set
                Configuration conf = UDFContext.getUDFContext().getJobConf();
                if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
                    conf.set("mapreduce.job.credentials.binary",
                            System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
                }
                loader = new ReadToEndLoader(
                        new InterStorage(), conf, scalarfilename, 0);
            } catch (Exception e) {
                throw new ExecException("Failed to open file '" + scalarfilename
                        + "'; error = " + e.getMessage());
            }
            try {
                Tuple t1 = loader.getNext();
                if(t1 == null){
                    log.warn("No scalar field to read, returning null");
                    return null;
                }
                value = t1.get(pos);
                Tuple t2 = loader.getNext();
                if(t2 != null){
                    String msg = "Scalar has more than one row in the output. "
                        + "1st : " + t1 + ", 2nd :" + t2;
                    throw new ExecException(msg);  
                }
View Full Code Here

        try{
             LoadFunc originalLoadFunc =
                 (LoadFunc)PigContext.instantiateFuncFromSpec(
                         outFileSpec.getFuncSpec());
            
             p = (LoadFunc) new ReadToEndLoader(originalLoadFunc,
                     ConfigurationUtil.toConfiguration(
                     pigContext.getProperties()), outFileSpec.getFileName(), 0);

        }catch (Exception e){
            int errCode = 2088;
View Full Code Here

            if (PigMapReduce.sJobConf.get("pig.tmpfilecompression.codec")!=null)
                conf.set("pig.tmpfilecompression.codec", PigMapReduce.sJobConf.get("pig.tmpfilecompression.codec"));
        }
        conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");

        ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(PigMapReduce.sJobConf), conf,
                keyDistFile, 0);
        DataBag partitionList;
        Tuple t = loader.getNext();
        if (t == null) {
            // this could happen if the input directory for sampling is empty
            log.warn("Empty dist file: " + keyDistFile);
            return reducerMap;
        }
View Full Code Here

                // (such as intermediate file). Just return null - the
                // same way as we would if we did not get a valid record
                return null;
            }
        }
        ReadToEndLoader loader = new ReadToEndLoader(wrappedLoadFunc, conf, location, 0);
        // get the first record from the input file
        // and figure out the schema from the data in
        // the first record
        Tuple t = loader.getNext();
        if (t == null) {
            // we couldn't get a valid record from the input
            return null;
        }
        int numFields = t.size();
View Full Code Here

                if (configuration.get("pig.tmpfilecompression.codec")!=null)
                    conf.set("pig.tmpfilecompression.codec", configuration.get("pig.tmpfilecompression.codec"));
            }
            conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");
           
            ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(conf),
                    conf, quantilesFile, 0);
            DataBag quantilesList;
            Tuple t = loader.getNext();
            if(t!=null)
            {
                // the Quantiles file has a tuple as under:
                // (numQuantiles, bag of samples)
                // numQuantiles here is the reduce parallelism
View Full Code Here

    public void testLFPig() throws Exception {
        Util.createInputFile(cluster, "input.txt", new String[]
                                        {"this:is:delimited:by:a:colon\n"});
        int arity1 = 6;
        LoadFunc lf = new PigStorage(":");
        LoadFunc p1 = new ReadToEndLoader(lf, ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f1 = p1.getNext();
        assertTrue(f1.size() == arity1);
        Util.deleteFile(cluster, "input.txt");
       
        int LOOP_COUNT = 100;
        String[] input = new String[LOOP_COUNT * LOOP_COUNT];
        int n = 0;
        for (int i = 0; i < LOOP_COUNT; i++) {
            for (int j = 0; j < LOOP_COUNT; j++) {
                input[n++] = (i + "\t" + i + "\t" + j % 2);
            }
        }
        Util.createInputFile(cluster, "input.txt", input);

        LoadFunc p15 = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
       
        int count = 0;
        while (true) {
            Tuple f15 = p15.getNext();
            if (f15 == null)
                break;
            count++;
            assertEquals(3, f15.size());
        }
        assertEquals(LOOP_COUNT * LOOP_COUNT, count);
        Util.deleteFile(cluster, "input.txt");
       
        String input2 = ":this:has:a:leading:colon\n";
        int arity2 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input2});
        LoadFunc p2 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f2 = p2.getNext();
        assertTrue(f2.size() == arity2);
        Util.deleteFile(cluster, "input.txt");
       
        String input3 = "this:has:a:trailing:colon:\n";
        int arity3 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input3});
        LoadFunc p3 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f3 = p3.getNext();
        assertTrue(f3.size() == arity3);
        Util.deleteFile(cluster, "input.txt");
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.io.ReadToEndLoader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.