Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore


      }
     
      //change plan to store the first join input into a temp file
      FileSpec fSpec = getTempFileSpec();
      MapReduceOper mro = compiledInputs[0];
      POStore str = getStore();
      str.setSFile(fSpec);
      if (!mro.isMapDone()) {
        mro.mapPlan.addAsLeaf(str);
        mro.setMapDoneSingle(true);
      } else if (mro.isMapDone() && !mro.isReduceDone()) {
        mro.reducePlan.addAsLeaf(str);
        mro.setReduceDone(true);
      } else {
        int errCode = 2022;
        String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
        throw new PlanException(msg, errCode, PigException.BUG);
      }
     
      FileSpec partitionFile = getTempFileSpec();
      int rp = op.getRequestedParallelism();
     
      Pair<MapReduceOper, Integer> sampleJobPair = getSkewedJoinSampleJob(op, mro, fSpec, partitionFile, rp);           
      rp = sampleJobPair.second;
     
      // set parallelism of SkewedJoin as the value calculated by sampling job
      // if "parallel" is specified in join statement, "rp" is equal to that number
      // if not specified, use the value that sampling process calculated
      // based on default.
      op.setRequestedParallelism(rp);
           
      // load the temp file for first table as input of join           
      MapReduceOper[] joinInputs = new MapReduceOper[] {startNew(fSpec, sampleJobPair.first), compiledInputs[1]};           
      MapReduceOper[] rearrangeOutputs = new MapReduceOper[2];                      
     
      compiledInputs = new MapReduceOper[] {joinInputs[0]};
      // run POLocalRearrange for first join table
      POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);           
      try {
        lr.setIndex(0);               
      } catch (ExecException e) {
        int errCode = 2058;
        String msg = "Unable to set index on newly created POLocalRearrange.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }
     
      List<PhysicalOperator> l = plan.getPredecessors(op);
      MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = op.getJoinPlans();
      List<PhysicalPlan> groups = (List<PhysicalPlan>)joinPlans.get(l.get(0));
      // check the type of group keys, if there are more than one field, the key is TUPLE.
      byte type = DataType.TUPLE;
      if (groups.size() == 1) {
        type = groups.get(0).getLeaves().get(0).getResultType();               
      }              
     
      lr.setKeyType(type);           
      lr.setPlans(groups);
      lr.setResultType(DataType.TUPLE);
     
      lr.visit(this);
      if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = lr.getRequestedParallelism();
      rearrangeOutputs[0] = curMROp;
     
      compiledInputs = new MapReduceOper[] {joinInputs[1]};      
      // if the map for current input is already closed, then start a new job
      if (compiledInputs[0].isMapDone() && !compiledInputs[0].isReduceDone()) {
        FileSpec f = getTempFileSpec();
        POStore s = getStore();
        s.setSFile(f);
        compiledInputs[0].reducePlan.addAsLeaf(s);
        compiledInputs[0].setReduceDone(true);
        compiledInputs[0] = startNew(f, compiledInputs[0]);
      }              
     
View Full Code Here


        POForEach nfe3 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, ep4s, flattened3);
       
        mro.reducePlan.add(nfe3);
        mro.reducePlan.connect(nfe2, nfe3);
       
        POStore str = getStore();
        str.setSFile(sampleFile);
       
        mro.reducePlan.add(str);
        mro.reducePlan.connect(nfe3, str);
       
        mro.setReduceDone(true);
View Full Code Here

            rem.split.removePlan(rem.storePlan);

            // Collapse split if only one nested plan remains.
            if (rem.split.getPlans().size() == 1) {
                PhysicalPlan plan = rem.split.getPlans().get(0);
                POStore store = (POStore)plan.getRoots().get(0);
                plan.remove(store);
                store.setInputs(rem.split.getInputs());
                rem.plan.replace(rem.split, store);
            }
        } catch(PlanException pe) {
            log.info("failed to remove unnecessary store from plan: "+pe.getMessage());
        }
View Full Code Here

            for (PhysicalPlan plan: split.getPlans()) {
                if (plan.size() == 1) {
                    PhysicalOperator op = plan.getRoots().get(0);
                    if (op instanceof POStore) {
                        POStore store = (POStore)op;

                        if (store.isTmpStore()) {
                            // tmp store means introduced by the
                            // MRCompiler. User didn't ask for
                            // those. There can be at most one per
                            // split. (Though there can be nested
                            // splits.)
                            tmpStore = plan;
                            sFile = store.getSFile();
                        } else if (store.getInputSpec() != null) {
                            // We set the input spec for store
                            // operators that had a corresponding load
                            // but we eliminated it in the
                            // PigServer. There could be multiple of
                            // those, but they are all reversible, so
                            // any of them will do.
                            lFile = store.getInputSpec();
                        }
                    }
                }
            }
View Full Code Here

                    fixProjectionAfterLimit(limitAdjustMROp, mr);
                    limitAdjustMROp.setLimitAfterSort(true);
                    limitAdjustMROp.setSortOrder(mr.getSortOrder());
                }
               
                POStore st = getStore();
                st.setSFile(oldSpec);
                st.setIsTmpStore(oldIsTmpStore);
                limitAdjustMROp.reducePlan.addAsLeaf(st);
                limitAdjustMROp.requestedParallelism = 1;
                limitAdjustMROp.setLimitOnly(true);
               
                List<MapReduceOper> successorList = MRPlan.getSuccessors(mr);
View Full Code Here

            nwJob.setOutputFormatClass(PigOutputFormat.class);
           
            if (mapStores.size() + reduceStores.size() == 1) { // single store case
                log.info("Setting up single store job");
               
                POStore st;
                if (reduceStores.isEmpty()) {
                    st = mapStores.get(0);
                    mro.mapPlan.remove(st);
                }
                else {
                    st = reduceStores.get(0);
                    mro.reducePlan.remove(st);
                }

                // set out filespecs
                String outputPath = st.getSFile().getFileName();
                FuncSpec outputFuncSpec = st.getSFile().getFuncSpec();
               
                conf.set("pig.streaming.log.dir",
                            new Path(outputPath, LOG_DIR).toString());
                conf.set("pig.streaming.task.output.dir", outputPath);
            }
           else { // multi store case
                log.info("Setting up multi store job");
                String tmpLocationStr =  FileLocalizer
                .getTemporaryPath(pigContext).toString();
                tmpLocation = new Path(tmpLocationStr);

                nwJob.setOutputFormatClass(PigOutputFormat.class);
               
                for (POStore sto: storeLocations) {
                    sto.setMultiStore(true);
                }
                conf.set("pig.streaming.log.dir",
                            new Path(tmpLocation, LOG_DIR).toString());
                conf.set("pig.streaming.task.output.dir", tmpLocation.toString());
           }

            // store map key type
            // this is needed when the key is null to create
            // an appropriate NullableXXXWritable object
            conf.set("pig.map.keytype", ObjectSerializer.serialize(new byte[] { mro.mapKeyType }));

            // set parent plan in all operators in map and reduce plans
            // currently the parent plan is really used only when POStream is present in the plan
            new PhyPlanSetter(mro.mapPlan).visit();
            new PhyPlanSetter(mro.reducePlan).visit();
           
            // this call modifies the ReplFiles names of POFRJoin operators
            // within the MR plans, must be called before the plans are
            // serialized
            setupDistributedCacheForJoin(mro, pigContext, conf);

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
                    nwJob.setCombinerClass(DistinctCombiner.Combine.class);
                    log.info("Setting identity combiner class.");
                }
                pack = (POPackage)mro.reducePlan.getRoots().get(0);
                mro.reducePlan.remove(pack);
                nwJob.setMapperClass(PigMapReduce.Map.class);
                nwJob.setReducerClass(PigMapReduce.Reduce.class);
               
                // first check the PARALLE in query, then check the defaultParallel in PigContext, and last do estimation
                if (mro.requestedParallelism > 0)
                    nwJob.setNumReduceTasks(mro.requestedParallelism);
    else if (pigContext.defaultParallel > 0)
                    conf.set("mapred.reduce.tasks", ""+pigContext.defaultParallel);
                else
                    estimateNumberOfReducers(conf,lds);
               
                if (mro.customPartitioner != null)
                  nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));

                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream or merge-join.
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
                conf.set("pig.reducePlan", ObjectSerializer.serialize(mro.reducePlan));
                if(mro.isEndOfAllInputSetInReduce()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream
                    conf.set("pig.stream.in.reduce", "true");
                }
                conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                conf.set("pig.reduce.key.type", Byte.toString(pack.getKeyType()));
               
                if (mro.getUseSecondaryKey()) {
                    nwJob.setGroupingComparatorClass(PigSecondaryKeyGroupComparator.class);
                    nwJob.setPartitionerClass(SecondaryKeyPartitioner.class);
                    nwJob.setSortComparatorClass(PigSecondaryKeyComparator.class);
                    nwJob.setOutputKeyClass(NullableTuple.class);
                    conf.set("pig.secondarySortOrder",
                            ObjectSerializer.serialize(mro.getSecondarySortOrder()));

                }
                else
                {
                    Class<? extends WritableComparable> keyClass = HDataType.getWritableComparableTypes(pack.getKeyType()).getClass();
                    nwJob.setOutputKeyClass(keyClass);
                    selectComparator(mro, pack.getKeyType(), nwJob);
                }
                nwJob.setOutputValueClass(NullableTuple.class);
            }
       
            if(mro.isGlobalSort() || mro.isLimitAfterSort()){
                // Only set the quantiles file and sort partitioner if we're a
                // global sort, not for limit after sort.
                if (mro.isGlobalSort()) {
                    String symlink = addSingleFileToDistributedCache(
                            pigContext, conf, mro.getQuantFile(), "pigsample");
                    conf.set("pig.quantilesFile", symlink);
                    nwJob.setPartitionerClass(WeightedRangePartitioner.class);
                }
               
                if (mro.isUDFComparatorUsed) { 
                    boolean usercomparator = false;
                    for (String compFuncSpec : mro.UDFs) {
                        Class comparator = PigContext.resolveClassName(compFuncSpec);
                        if(ComparisonFunc.class.isAssignableFrom(comparator)) {
                            nwJob.setMapperClass(PigMapReduce.MapWithComparator.class);
                            nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class);
                            conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                            conf.set("pig.usercomparator", "true");
                            nwJob.setOutputKeyClass(NullableTuple.class);
                            nwJob.setSortComparatorClass(comparator);
                            usercomparator = true;
                            break;
                        }
                    }
                    if (!usercomparator) {
                        String msg = "Internal error. Can't find the UDF comparator";
                        throw new IOException (msg);
                    }
                   
                } else {
                    conf.set("pig.sortOrder",
                        ObjectSerializer.serialize(mro.getSortOrder()));
                }
            }
           
            if (mro.isSkewedJoin()) {
                String symlink = addSingleFileToDistributedCache(pigContext,
                        conf, mro.getSkewedJoinPartitionFile(), "pigdistkey");
                conf.set("pig.keyDistFile", symlink);
                nwJob.setPartitionerClass(SkewedPartitioner.class);
                nwJob.setMapperClass(PigMapReduce.MapWithPartitionIndex.class);
                nwJob.setMapOutputKeyClass(NullablePartitionWritable.class);
                nwJob.setGroupingComparatorClass(PigGroupingPartitionWritableComparator.class);
            }
           
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            for (POStore st: mapStores) { st.setInputs(null); st.setParentPlan(null);}
            for (POStore st: reduceStores) { st.setInputs(null); st.setParentPlan(null);}

            // tmp file compression setups
            if (Utils.tmpFileCompression(pigContext)) {
                conf.setBoolean("pig.tmpfilecompression", true);
                conf.set("pig.tmpfilecompression.codec", Utils.tmpFileCompressionCodec(pigContext));
View Full Code Here

            LOG.warn("unable to get stores of the job");
            return;
        }
       
        if (mapStores.size() + reduceStores.size() == 1) {
            POStore sto = (mapStores.size() > 0) ? mapStores.get(0)
                    : reduceStores.get(0);
            if (!sto.isTmpStore() || state != JobState.SUCCESS) {
                long records = (mapStores.size() > 0) ? mapOutputRecords
                        : reduceOutputRecords;          
                OutputStats ds = new OutputStats(sto.getSFile().getFileName(),
                        hdfsBytesWritten, records, (state == JobState.SUCCESS));
                ds.setPOStore(sto);
                ds.setConf(conf);
                outputs.add(ds);
               
                if (state == JobState.SUCCESS) {
                    ScriptState.get().emitOutputCompletedNotification(ds);
                }
            }
        } else {
            for (POStore sto : mapStores) {
                if (sto.isTmpStore() && state == JobState.SUCCESS) continue;
                addOneOutputStats(sto);
            }
            for (POStore sto : reduceStores) {
                if (sto.isTmpStore() && state == JobState.SUCCESS) continue;
                addOneOutputStats(sto);
            }    
        }
    }
View Full Code Here

   
    @Override
    public void visit(LOStore loStore) throws FrontendException {
        String scope = DEFAULT_SCOPE;
//        System.err.println("Entering Store");
        POStore store = new POStore(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)));
        store.setAlias(((LogicalRelationalOperator)loStore.getPlan().
                getPredecessors(loStore).get(0)).getAlias());
        store.setSFile(loStore.getOutputSpec());
        store.setInputSpec(loStore.getInputSpec());
        store.setSignature(loStore.getSignature());
        store.setSortInfo(loStore.getSortInfo());
        store.setIsTmpStore(loStore.isTmpStore());
       
        store.setSchema(Util.translateSchema( loStore.getSchema() ));

        currentPlan.add(store);
       
        List<Operator> op = loStore.getPlan().getPredecessors(loStore);
        PhysicalOperator from = null;
View Full Code Here

                        FileSpec newSpec = getTempFileSpec();
                       
                        // replace oldSpec in mro with newSpec
                        new FindStoreNameVisitor(pl, newSpec, oldSpec).visit();
                       
                        POStore newSto = getStore();
                        newSto.setSFile(oldSpec);
                        if (MRPlan.getPredecessors(mrOp)!=null &&
                                MRPlan.getPredecessors(mrOp).contains(mro))
                            MRPlan.disconnect(mro, mrOp);
                        MapReduceOper catMROp = getConcatenateJob(newSpec, mro, newSto);
                        MRPlan.connect(catMROp, mrOp);  
View Full Code Here

        ld.setPc(pigContext);
        return ld;
    }
   
    private POStore getStore(){
        POStore st = new POStore(new OperatorKey(scope,nig.getNextNodeId(scope)));
        // mark store as tmp store. These could be removed by the
        // optimizer, because it wasn't the user requesting it.
        st.setIsTmpStore(true);
        return st;
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.