Examples of org.apache.pig.impl.io.ReadToEndLoader

org.apache.pig.impl.io.ReadToEndLoader
This is wrapper Loader which wraps a real LoadFunc underneath and allows to read a file completely starting a given split (indicated by a split index which is used to look in the List returned by the underlying InputFormat's getSplits() method). So if the supplied split index is 0, this loader will read the entire file. If it is non zero it will read the partial file beginning from that split to the last split. The call sequence to use this is: 1) construct an object using the constructor 2) Call getNext() in a loop till it returns null

                value = t1.get(pos);
                valueLoaded = true;
                return value;
            }


            ReadToEndLoader loader;
            try {
                pos = DataType.toInteger(input.get(0));
                scalarfilename = DataType.toString(input.get(1));


                // Hadoop security need this property to be set
                Configuration conf = UDFContext.getUDFContext().getJobConf();
                if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
                    conf.set(MRConfiguration.JOB_CREDENTIALS_BINARY,
                            System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
                }
                loader = new ReadToEndLoader(
                        new InterStorage(), conf, scalarfilename, 0);
            } catch (Exception e) {
                throw new ExecException("Failed to open file '" + scalarfilename
                        + "'; error = " + e.getMessage());
            }
            try {
                Tuple t1 = loader.getNext();
                if(t1 == null){
                    log.warn("No scalar field to read, returning null");
                    valueLoaded = true;
                    return null;
                }
                value = t1.get(pos);
                Tuple t2 = loader.getNext();
                if(t2 != null){
                    String msg = "Scalar has more than one row in the output. "
                        + "1st : " + t1 + ", 2nd :" + t2;
                    throw new ExecException(msg);
                }

View Full Code Here

            conf.set(MRConfiguration.JOB_CREDENTIALS_BINARY, 
                    System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
        }
        
        //create ReadToEndLoader that will read the given splits in order
        loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec),
                conf, inpLocation, splitsToBeRead);
    }

View Full Code Here

            }


            MapRedUtil.copyTmpFileConfigurationValues(job, conf);
            conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");


            ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(conf),
                    conf, quantilesFile, 0);
            Tuple t = loader.getNext();
            if (t != null) {
                // the Quantiles file has a tuple as under:
                // (numQuantiles, bag of samples)
                // numQuantiles here is the reduce parallelism
                quantileMap = (Map<String, Object>) t.get(0);

View Full Code Here


        copyTmpFileConfigurationValues(PigMapReduce.sJobConfInternal.get(), conf);


        conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");


        ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(PigMapReduce.sJobConfInternal.get()), conf,
                keyDistFile, 0);
        DataBag partitionList;
        Tuple t = loader.getNext();
        if (t == null) {
            // this could happen if the input directory for sampling is empty
            log.warn("Empty dist file: " + keyDistFile);
            return reducerMap;
        }

View Full Code Here

     * 2) Opening an input stream to the specified file and
     * 3) Binding to the input stream at the specified offset.
     * @throws IOException
     */
    public void setUp() throws IOException{
        loader = new ReadToEndLoader((LoadFunc)
                PigContext.instantiateFuncFromSpec(lFile.getFuncSpec()), 
                ConfigurationUtil.toConfiguration(pc.getProperties()), 
                lFile.getFileName(),0, signature);
    }

View Full Code Here

        try{
             LoadFunc originalLoadFunc = 
                 (LoadFunc)PigContext.instantiateFuncFromSpec(
                         outFileSpec.getFuncSpec());
             
             p = (LoadFunc) new ReadToEndLoader(originalLoadFunc, 
                     ConfigurationUtil.toConfiguration(
                     pigContext.getProperties()), outFileSpec.getFileName(), 0, pigContext);


        }catch (Exception e){
            int errCode = 2088;

View Full Code Here

        }
        try {
            LoadFunc originalLoadFunc = (LoadFunc) PigContext
                    .instantiateFuncFromSpec(store.getSFile().getFuncSpec());


            p = (LoadFunc) new ReadToEndLoader(originalLoadFunc,
                    ConfigurationUtil.toConfiguration(pigContext
                            .getProperties()), store.getSFile().getFileName(),
                    0);


        } catch (Exception e) {

View Full Code Here

                // (such as intermediate file). Just return null - the
                // same way as we would if we did not get a valid record
                return null;
            }
        }
        ReadToEndLoader loader = new ReadToEndLoader(wrappedLoadFunc, conf, location, 0);
        // get the first record from the input file
        // and figure out the schema from the data in
        // the first record
        Tuple t = loader.getNext();
        if (t == null) {
            // we couldn't get a valid record from the input
            return null;
        }
        int numFields = t.size();

View Full Code Here

            conf.set("mapreduce.job.credentials.binary", 
                    System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
        }
        
        //create ReadToEndLoader that will read the given splits in order
        loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec),
                conf, inpLocation, splitsToBeRead);
    }

View Full Code Here

                value = t1.get(pos);
                valueLoaded = true;
                return value;
            }


            ReadToEndLoader loader;
            try {
                pos = DataType.toInteger(input.get(0));
                scalarfilename = DataType.toString(input.get(1));


                // Hadoop security need this property to be set
                Configuration conf = UDFContext.getUDFContext().getJobConf();
                if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
                    conf.set("mapreduce.job.credentials.binary",
                            System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
                }
                loader = new ReadToEndLoader(
                        new InterStorage(), conf, scalarfilename, 0);
            } catch (Exception e) {
                throw new ExecException("Failed to open file '" + scalarfilename
                        + "'; error = " + e.getMessage());
            }
            try {
                Tuple t1 = loader.getNext();
                if(t1 == null){
                    log.warn("No scalar field to read, returning null");
                    valueLoaded = true;
                    return null;
                }
                value = t1.get(pos);
                Tuple t2 = loader.getNext();
                if(t2 != null){
                    String msg = "Scalar has more than one row in the output. "
                        + "1st : " + t1 + ", 2nd :" + t2;
                    throw new ExecException(msg);
                }

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of org.apache.pig.impl.io.ReadToEndLoader

com.netflix.lipstick.util.OutputSampler

org.apache.hadoop.conf.Configuration

org.apache.hadoop.mapreduce.InputSplit

org.apache.hadoop.mapreduce.Job

org.apache.hadoop.mapreduce.JobContext

org.apache.hadoop.mapreduce.JobID

org.apache.hadoop.mapreduce.TaskAttemptContext

org.apache.hadoop.mapreduce.TaskAttemptID

org.apache.pig.backend.hadoop.executionengine.HJob

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.WeightedRangePartitioner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.