Package org.apache.pig

Examples of org.apache.pig.LoadFunc


                String errMsg = "Expected physical operator at root to be POLoad. Found : "+phyOp.getClass().getCanonicalName();
                throw new MRCompilerException(errMsg,errCode,PigException.BUG);
            }
           
           
            LoadFunc loadFunc = ((POLoad)phyOp).getLoadFunc();
            try {
                if(!(CollectableLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
                    int errCode = 2249;
                    throw new MRCompilerException("While using 'collected' on group; data must be loaded via loader implementing CollectableLoadFunc.", errCode);
                }
                ((CollectableLoadFunc)loadFunc).ensureAllKeyInstancesInSameSplit();
            } catch (MRCompilerException e){
View Full Code Here


                        if(!UriUtil.isHDFSFile(location))
                            continue;
                        Path path = new Path(location);
                        FileSystem fs = path.getFileSystem(conf);
                        if (fs.exists(path)) {
                            LoadFunc loader = (LoadFunc) PigContext
                            .instantiateFuncFromSpec(ld.getLFile()
                                    .getFuncSpec());
                            Job job = new Job(conf);
                            loader.setLocation(location, job);
                            InputFormat inf = loader.getInputFormat();
                            List<InputSplit> splits = inf.getSplits(new JobContext(
                                    job.getConfiguration(), job.getJobID()));
                            List<List<InputSplit>> results = MapRedUtil
                            .getCombinePigSplits(splits, fs
                                    .getDefaultBlockSize(), conf);
View Full Code Here

                }
               
                POLoad sideLoader = (POLoad)rootPOOp;
                FileSpec loadFileSpec = sideLoader.getLFile();
                FuncSpec funcSpec = loadFileSpec.getFuncSpec();
                LoadFunc loadfunc = sideLoader.getLoadFunc();
                if(i == 0){
                   
                    if(!(CollectableLoadFunc.class.isAssignableFrom(loadfunc.getClass()))){
                      int errCode = 2252;
                        throw new MRCompilerException("Base loader in Cogroup must implement CollectableLoadFunc.", errCode);
                    }
                   
                    ((CollectableLoadFunc)loadfunc).ensureAllKeyInstancesInSameSplit();
                    continue;
                }
                if(!(IndexableLoadFunc.class.isAssignableFrom(loadfunc.getClass()))){
                    int errCode = 2253;
                    throw new MRCompilerException("Side loaders in cogroup must implement IndexableLoadFunc.", errCode);
                }
               
                funcSpecs.add(funcSpec);
View Full Code Here

        // First replace loader with  MergeJoinIndexer.
        PhysicalPlan baseMapPlan = baseMROp.mapPlan;
        POLoad baseLoader = (POLoad)baseMapPlan.getRoots().get(0);                           
        FileSpec origLoaderFileSpec = baseLoader.getLFile();
        FuncSpec funcSpec = origLoaderFileSpec.getFuncSpec();
        LoadFunc loadFunc = baseLoader.getLoadFunc();
       
        if (! (OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
            int errCode = 1104;
            String errMsg = "Base relation of merge-coGroup must implement " +
            "OrderedLoadFunc interface. The specified loader "
            + funcSpec + " doesn't implement it";
            throw new MRCompilerException(errMsg,errCode);
View Full Code Here

            rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.       
           
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
            joinOp.setSignature(rightLoader.getSignature());
            LoadFunc rightLoadFunc = rightLoader.getLoadFunc();
            if(IndexableLoadFunc.class.isAssignableFrom(rightLoadFunc.getClass())) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
               
                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                }
                rightMROpr = null;
               
                // validate that the join keys in merge join are only                                                                                                                                                                             
                // simple column projections or '*' and not expression - expressions                                                                                                                                                              
                // cannot be handled when the index is built by the storage layer on the sorted                                                                                                                                                   
                // data when the sorted data (and corresponding index) is written.                                                                                                                                                                
                // So merge join will be restricted not have expressions as                                                                                                                                                                       
                // join keys     
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
               
                // Replace POLoad with  indexer.

                LoadFunc loadFunc = rightLoader.getLoadFunc();
                if (! (OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement " +
                    "OrderedLoadFunc interface. The specified loader "
                    + loadFunc + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
View Full Code Here

        // here and set it to the configuration object. This number is needed
        // by PoissonSampleLoader to compute the number of samples
        int n = pigSplit.getTotalSplits();
        context.getConfiguration().setInt("pig.mapsplits.count", n);
        Configuration conf = context.getConfiguration();
        LoadFunc loadFunc = getLoadFunc(pigSplit.getInputIndex(), conf);
        // Pass loader signature to LoadFunc and to InputFormat through
        // the conf
        passLoadSignature(loadFunc, pigSplit.getInputIndex(), conf);
       
        // merge entries from split specific conf into the conf we got
        PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf);
       
        // for backward compatibility
        PigInputFormat.sJob = conf;
       
        InputFormat inputFormat = loadFunc.getInputFormat();
       
        return new PigRecordReader(inputFormat, pigSplit, loadFunc, context);
    }
View Full Code Here

                // input location into the configuration (for example,
                // FileInputFormat stores this in mapred.input.dir in the conf),
                // then for different inputs, the loader's don't end up
                // over-writing the same conf.
                FuncSpec loadFuncSpec = inputs.get(i).getFuncSpec();
                LoadFunc loadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(
                        loadFuncSpec);
                boolean combinable = !(loadFunc instanceof MergeJoinIndexer) &&
                !(IndexableLoadFunc.class.isAssignableFrom(loadFunc.getClass())) &&
                !(CollectableLoadFunc.class.isAssignableFrom(loadFunc.getClass()) &&
                    OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()));
                if (combinable)
                    combinable = !conf.getBoolean("pig.noSplitCombination", false);
                Configuration confClone = new Configuration(conf);
                Job inputSpecificJob = new Job(confClone);
                // Pass loader signature to LoadFunc and to InputFormat through
                // the conf
                passLoadSignature(loadFunc, i, inputSpecificJob.getConfiguration());
                loadFunc.setLocation(inputs.get(i).getFileName(),
                        inputSpecificJob);
                // The above setLocation call could write to the conf within
                // the inputSpecificJob - use this updated conf
               
                // get the InputFormat from it and ask for splits
                InputFormat inpFormat = loadFunc.getInputFormat();
                List<InputSplit> oneInputSplits = inpFormat.getSplits(
                        new JobContext(inputSpecificJob.getConfiguration(),
                                jobcontext.getJobID()));
                List<InputSplit> oneInputPigSplits = getPigSplits(
                        oneInputSplits, i, inpTargets.get(i), fs.getDefaultBlockSize(), combinable, confClone);
View Full Code Here

      t.append(r.nextInt());
      bag.add(t);
    }
   
    POCast op = new POCast(new OperatorKey("", r.nextLong()), -1);
    LoadFunc load = new TestLoader();
    op.setFuncSpec(new FuncSpec(load.getClass().getName()));
    POProject prj = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
    PhysicalPlan plan = new PhysicalPlan();
    plan.add(prj);
    plan.add(op);
    plan.connect(prj, op);
View Full Code Here

      t.append(r.nextLong());
      bag.add(t);
    }
   
    POCast op = new POCast(new OperatorKey("", r.nextLong()), -1);
    LoadFunc load = new TestLoader();
    op.setFuncSpec(new FuncSpec(load.getClass().getName()));
    POProject prj = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
    PhysicalPlan plan = new PhysicalPlan();
    plan.add(prj);
    plan.add(op);
    plan.connect(prj, op);
View Full Code Here

      t.append(r.nextFloat());
      bag.add(t);
    }
   
    POCast op = new POCast(new OperatorKey("", r.nextLong()), -1);
    LoadFunc load = new TestLoader();
    op.setFuncSpec(new FuncSpec(load.getClass().getName()));
    POProject prj = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
    PhysicalPlan plan = new PhysicalPlan();
    plan.add(prj);
    plan.add(op);
    plan.connect(prj, op);
View Full Code Here

TOP

Related Classes of org.apache.pig.LoadFunc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.