Package org.apache.pig

Examples of org.apache.pig.LoadFunc$RequiredField


        // here and set it to the configuration object. This number is needed
        // by PoissonSampleLoader to compute the number of samples
        int n = pigSplit.getTotalSplits();
        context.getConfiguration().setInt("pig.mapsplits.count", n);
        Configuration conf = context.getConfiguration();
        LoadFunc loadFunc = getLoadFunc(pigSplit.getInputIndex(), conf);
        // Pass loader signature to LoadFunc and to InputFormat through
        // the conf
        passLoadSignature(loadFunc, pigSplit.getInputIndex(), conf);
       
        // merge entries from split specific conf into the conf we got
        PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf);
       
        // for backward compatibility
        PigInputFormat.sJob = conf;
       
        InputFormat inputFormat = loadFunc.getInputFormat();
        // now invoke the createRecordReader() with this "adjusted" conf
        RecordReader reader = inputFormat.createRecordReader(
                pigSplit.getWrappedSplit(), context);
       
        return new PigRecordReader(reader, loadFunc, conf);
View Full Code Here


                // input location into the configuration (for example,
                // FileInputFormat stores this in mapred.input.dir in the conf),
                // then for different inputs, the loader's don't end up
                // over-writing the same conf.
                FuncSpec loadFuncSpec = inputs.get(i).getFuncSpec();
                LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(
                        loadFuncSpec);
                Configuration confClone = new Configuration(conf);
                Job inputSpecificJob = new Job(confClone);
                // Pass loader signature to LoadFunc and to InputFormat through
                // the conf
                passLoadSignature(loadFunc, i, inputSpecificJob.getConfiguration());
                loadFunc.setLocation(inputs.get(i).getFileName(),
                        inputSpecificJob);
                // The above setLocation call could write to the conf within
                // the inputSpecificJob - use this updated conf
               
                // get the InputFormat from it and ask for splits
                InputFormat inpFormat = loadFunc.getInputFormat();
                List<InputSplit> oneInputSplits = inpFormat.getSplits(
                        new JobContext(inputSpecificJob.getConfiguration(),
                                jobcontext.getJobID()));
                List<PigSplit> oneInputPigSplits = getPigSplits(
                        oneInputSplits, i, inpTargets.get(i), conf);
View Full Code Here

     * 3) Binding to the input stream at the specified offset.
     * @throws IOException
     */
    public void setUp() throws IOException{
        String filename = lFile.getFileName();
        LoadFunc origloader =
            (LoadFunc)PigContext.instantiateFuncFromSpec(lFile.getFuncSpec());
        loader = new ReadToEndLoader(origloader,
                ConfigurationUtil.toConfiguration(pc.getProperties()),
                filename,
                0);
View Full Code Here

    @Test
    public void testLFPig() throws Exception {
        Util.createInputFile(cluster, "input.txt", new String[]
                                        {"this:is:delimited:by:a:colon\n"});
        int arity1 = 6;
        LoadFunc lf = new PigStorage(":");
        LoadFunc p1 = new ReadToEndLoader(lf, ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f1 = p1.getNext();
        assertTrue(f1.size() == arity1);
        Util.deleteFile(cluster, "input.txt");
       
        int LOOP_COUNT = 100;
        String[] input = new String[LOOP_COUNT * LOOP_COUNT];
        int n = 0;
        for (int i = 0; i < LOOP_COUNT; i++) {
            for (int j = 0; j < LOOP_COUNT; j++) {
                input[n++] = (i + "\t" + i + "\t" + j % 2);
            }
        }
        Util.createInputFile(cluster, "input.txt", input);

        LoadFunc p15 = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
       
        int count = 0;
        while (true) {
            Tuple f15 = p15.getNext();
            if (f15 == null)
                break;
            count++;
            assertEquals(3, f15.size());
        }
        assertEquals(LOOP_COUNT * LOOP_COUNT, count);
        Util.deleteFile(cluster, "input.txt");
       
        String input2 = ":this:has:a:leading:colon\n";
        int arity2 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input2});
        LoadFunc p2 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f2 = p2.getNext();
        assertTrue(f2.size() == arity2);
        Util.deleteFile(cluster, "input.txt");
       
        String input3 = "this:has:a:trailing:colon:\n";
        int arity3 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input3});
        LoadFunc p3 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f3 = p3.getNext();
        assertTrue(f3.size() == arity3);
        Util.deleteFile(cluster, "input.txt");
    }
View Full Code Here

        Util.createInputFile(cluster,
                "/tmp/testLFTextdir1/testLFTextdir2/testLFTest-input1.txt",
                new String[] {input1});
        // check that loading the top level dir still reading the file a couple
        // of subdirs below
        LoadFunc text1 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "/tmp/testLFTextdir1", 0);
        Tuple f1 = text1.getNext();
        Tuple f2 = text1.getNext();
        assertTrue(expected1.equals(f1.get(0).toString()) &&
            expected2.equals(f2.get(0).toString()));
        Util.deleteFile(cluster, "testLFTest-input1.txt");
        Util.createInputFile(cluster, "testLFTest-input2.txt");
        LoadFunc text2 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "testLFTest-input2.txt", 0);
        Tuple f3 = text2.getNext();
        assertTrue(f3 == null);
        Util.deleteFile(cluster, "testLFTest-input2.txt");
    }
View Full Code Here

        String query = "a = load 'testSFPig-input.txt';" +
            "store a into 'testSFPig-output.txt';";
        pigServer.setBatchOn();
        Util.registerMultiLineQuery(pigServer, query);
        pigServer.executeBatch();
        LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
                toConfiguration(cluster.getProperties()), "testSFPig-output.txt", 0);
        Tuple f2 = lfunc.getNext();
       
        assertEquals(f1, f2);
        Util.deleteFile(cluster, "testSFPig-input.txt");
        Util.deleteFile(cluster, "testSFPig-output.txt");
    }
View Full Code Here

            //Process the POLoads
            List<POLoad> lds = PlanHelper.getLoads(mro.mapPlan);
           
            if(lds!=null && lds.size()>0){
                for (POLoad ld : lds) {
                    LoadFunc lf = ld.getLoadFunc();
                    // Call setLocation as a hacky way of letting a LoadFunc fix up the Job.
                    // Note that setLocation will get called on the loadFuncs later, as well.
                    // That's ok as setLocation getting called multiple times is documented behavior.
                    if (lf !=null) {
                        lf.setLocation(ld.getLFile().getFileName(), nwJob);
                    }
                    //Store the inp filespecs
                    inp.add(ld.getLFile());
                   
                    //Store the target operators for tuples read
View Full Code Here

        // For all other splits, bind to the first key which is greater
        // then or equal to the first key of the map.

        for(int i=0; i < relationCnt-1; i ++){

            LoadFunc loadfunc = (LoadFunc)PigContext.instantiateFuncFromSpec(sidFuncSpecs.get(i));
            loadfunc.setUDFContextSignature(loaderSignatures.get(i));
            Job dummyJob = new Job(new Configuration(PigMapReduce.sJobConfInternal.get()));
            loadfunc.setLocation(sideFileSpecs.get(i), dummyJob);
            ((IndexableLoadFunc)loadfunc).initialize(dummyJob.getConfiguration());
            sideLoaders.add(loadfunc);
            Tuple rearranged;

            if ( index.get(0).first.equals(curSplitIdx)){
                // This is a first split, bind at very first record in all side relations.
                Tuple t = loadfunc.getNext();
                if(null == t)   // This side relation is entirely empty.
                    continue;
                rearranged = applyLRon(t, i+1);
                heap.offer(rearranged);
                continue;
            }
            else{
                // This is not a first split, we need to bind to the key equal
                // to the firstBaseKey or next key thereafter.

                // First seek close to base key. 
                ((IndexableLoadFunc)loadfunc).seekNear(firstBaseKey instanceof
                        Tuple ? (Tuple) firstBaseKey : mTupleFactory.newTuple(firstBaseKey));

                // Since contract of IndexableLoadFunc is not clear where we
                // will land up after seekNear() call,
                // we start reading from side loader to get to the point where key
                // is actually greater or equal to base key.
                while(true){
                    Tuple t = loadfunc.getNext();
                    if(t==null) // This relation has ended.
                        break;
                    rearranged = applyLRon(t, i+1);
                    if(rearranged.get(1) == null) // If we got a null key here
                        continue;             // it implies we are still behind.
View Full Code Here

                String errMsg = "Expected physical operator at root to be POLoad. Found : "+phyOp.getClass().getCanonicalName();
                throw new MRCompilerException(errMsg,errCode,PigException.BUG);
            }
           
           
            LoadFunc loadFunc = ((POLoad)phyOp).getLoadFunc();
            try {
                if(!(CollectableLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
                    int errCode = 2249;
                    throw new MRCompilerException("While using 'collected' on group; data must be loaded via loader implementing CollectableLoadFunc.", errCode);
                }
                ((CollectableLoadFunc)loadFunc).ensureAllKeyInstancesInSameSplit();
            } catch (MRCompilerException e){
View Full Code Here

                        if(!UriUtil.isHDFSFile(location))
                            continue;
                        Path path = new Path(location);
                        FileSystem fs = path.getFileSystem(conf);
                        if (fs.exists(path)) {
                            LoadFunc loader = (LoadFunc) PigContext
                            .instantiateFuncFromSpec(ld.getLFile()
                                    .getFuncSpec());
                            Job job = new Job(conf);
                            loader.setLocation(location, job);
                            InputFormat inf = loader.getInputFormat();
                            List<InputSplit> splits = inf.getSplits(new JobContext(
                                    job.getConfiguration(), job.getJobID()));
                            List<List<InputSplit>> results = MapRedUtil
                            .getCombinePigSplits(splits, fs
                                    .getDefaultBlockSize(), conf);
View Full Code Here

TOP

Related Classes of org.apache.pig.LoadFunc$RequiredField

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.