Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange


                String msg = "Internal error. Could not compute key type of sort operator.";
                throw new PlanException(msg, errCode, PigException.BUG, ve);
            }
        }
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
            keyType);
        lr.setPlans(eps1);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.addAsLeaf(lr);
       
        mro.setMapDone(true);
       
        if (limit!=-1) {
          POPackageLite pkg_c = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pkg_c.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
            pkg_c.setNumInps(1);
            //pkg.setResultType(DataType.TUPLE);           
            mro.combinePlan.add(pkg_c);
         
            List<PhysicalPlan> eps_c1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat_c1 = new ArrayList<Boolean>();
            PhysicalPlan ep_c1 = new PhysicalPlan();
            POProject prj_c1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj_c1.setColumn(1);
            prj_c1.setOverloaded(false);
            prj_c1.setResultType(DataType.BAG);
            ep_c1.add(prj_c1);
            eps_c1.add(ep_c1);
            flat_c1.add(true);
            POForEach fe_c1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),
                -1, eps_c1, flat_c1);
            fe_c1.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(fe_c1);
           
            POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pLimit.setLimit(limit);
          mro.combinePlan.addAsLeaf(pLimit);
           
            List<PhysicalPlan> eps_c2 = new ArrayList<PhysicalPlan>();
            eps_c2.addAll(sort.getSortPlans());
       
          POLocalRearrange lr_c2 = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
          try {
                lr_c2.setIndex(0);
            } catch (ExecException e) {
              int errCode = 2058;
              String msg = "Unable to set index on newly created POLocalRearrange.";             
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }
          lr_c2.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
          lr_c2.setPlans(eps_c2);
          lr_c2.setResultType(DataType.TUPLE);
          mro.combinePlan.addAsLeaf(lr_c2);
        }
       
        POPackageLite pkg = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
View Full Code Here


        ep1.add(ce);
       
        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep1);
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.CHARARRAY);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.add(lr);
        mro.mapPlan.connect(nfe1, lr);
       
        mro.setMapDone(true);
       
View Full Code Here

        }
    }
   
    private void connectMapToReduceLimitedSort(MapReduceOper mro, MapReduceOper sortMROp) throws PlanException, VisitorException
    {
        POLocalRearrange slr = (POLocalRearrange)sortMROp.mapPlan.getLeaves().get(0);
       
        POLocalRearrange lr = null;
        try {
            lr = slr.clone();
        } catch (CloneNotSupportedException e) {
            int errCode = 2147;
            String msg = "Error cloning POLocalRearrange for limit after sort";
View Full Code Here

        ep.add(prjStar);
       
        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep);
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
            int errCode = 2058;
            String msg = "Unable to set index on the newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.TUPLE);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
       
        mro.mapPlan.addAsLeaf(lr);
       
        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType(DataType.TUPLE);
View Full Code Here

            ep.add(prjStar);
           
            List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
            eps.add(ep);
           
            POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
            lr.setIndex(0);
            lr.setKeyType(DataType.TUPLE);
            lr.setPlans(eps);
            lr.setResultType(DataType.TUPLE);
            lr.setDistinct(true);
           
            addToMap(lr);
           
            blocking(op);
           
View Full Code Here

      MapReduceOper[] joinInputs = new MapReduceOper[] {startNew(fSpec, sampleJobPair.first), compiledInputs[1]};           
      MapReduceOper[] rearrangeOutputs = new MapReduceOper[2];                      
     
      compiledInputs = new MapReduceOper[] {joinInputs[0]};
      // run POLocalRearrange for first join table
      POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);           
      try {
        lr.setIndex(0);               
      } catch (ExecException e) {
        int errCode = 2058;
        String msg = "Unable to set index on newly created POLocalRearrange.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }
     
      List<PhysicalOperator> l = plan.getPredecessors(op);
      MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = op.getJoinPlans();
      List<PhysicalPlan> groups = joinPlans.get(l.get(0));
      // check the type of group keys, if there are more than one field, the key is TUPLE.
      byte type = DataType.TUPLE;
      if (groups.size() == 1) {
        type = groups.get(0).getLeaves().get(0).getResultType();               
      }              
     
      lr.setKeyType(type);           
      lr.setPlans(groups);
      lr.setResultType(DataType.TUPLE);
     
      lr.visit(this);
      if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = lr.getRequestedParallelism();
      rearrangeOutputs[0] = curMROp;
     
      compiledInputs = new MapReduceOper[] {joinInputs[1]};      
      // if the map for current input is already closed, then start a new job
      if (compiledInputs[0].isMapDone() && !compiledInputs[0].isReduceDone()) {
        FileSpec f = getTempFileSpec();
        POStore s = getStore();
        s.setSFile(f);
        compiledInputs[0].reducePlan.addAsLeaf(s);
        compiledInputs[0].setReduceDone(true);
        compiledInputs[0] = startNew(f, compiledInputs[0]);
      }              
     
      // run POPartitionRearrange for second join table
      POPartitionRearrange pr =
          new POPartitionRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
      pr.setPigContext(pigContext);
      lr = pr;
      try {
        lr.setIndex(1);
      } catch (ExecException e) {
        int errCode = 2058;
        String msg = "Unable to set index on newly created POLocalRearrange.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }              
     
      groups = joinPlans.get(l.get(1));
      lr.setPlans(groups);
      lr.setKeyType(type);           
      lr.setResultType(DataType.BAG);
     
      lr.visit(this);
      if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = lr.getRequestedParallelism();
      rearrangeOutputs[1] = curMROp;                    
      compiledInputs = rearrangeOutputs;
            
     
      // create POGlobalRearrange
View Full Code Here

                String msg = "Internal error. Could not compute key type of sort operator.";
                throw new PlanException(msg, errCode, PigException.BUG, ve);
            }
        }
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
            keyType);
        lr.setPlans(eps1);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.addAsLeaf(lr);
       
        mro.setMapDone(true);
       
        if (limit!=-1) {
          POPackageLite pkg_c = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pkg_c.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
            pkg_c.setNumInps(1);
            //pkg.setResultType(DataType.TUPLE);           
            mro.combinePlan.add(pkg_c);
         
            List<PhysicalPlan> eps_c1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat_c1 = new ArrayList<Boolean>();
            PhysicalPlan ep_c1 = new PhysicalPlan();
            POProject prj_c1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj_c1.setColumn(1);
            prj_c1.setOverloaded(false);
            prj_c1.setResultType(DataType.BAG);
            ep_c1.add(prj_c1);
            eps_c1.add(ep_c1);
            flat_c1.add(true);
            POForEach fe_c1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),
                -1, eps_c1, flat_c1);
            fe_c1.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(fe_c1);
           
            POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pLimit.setLimit(limit);
          mro.combinePlan.addAsLeaf(pLimit);
           
            List<PhysicalPlan> eps_c2 = new ArrayList<PhysicalPlan>();
            eps_c2.addAll(sort.getSortPlans());
       
          POLocalRearrange lr_c2 = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
          try {
                lr_c2.setIndex(0);
            } catch (ExecException e) {
              int errCode = 2058;
              String msg = "Unable to set index on newly created POLocalRearrange.";             
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }
          lr_c2.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
          lr_c2.setPlans(eps_c2);
          lr_c2.setResultType(DataType.TUPLE);
          mro.combinePlan.addAsLeaf(lr_c2);
        }
       
        POPackageLite pkg = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
View Full Code Here

        ep1.add(ce);
       
        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep1);
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.CHARARRAY);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.add(lr);
        mro.mapPlan.connect(nfe1, lr);
       
        mro.setMapDone(true);
       
View Full Code Here

        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);
        if (!(mapLeaf instanceof POLocalRearrange)) {
            return;
        }
        POLocalRearrange rearrange = (POLocalRearrange)mapLeaf;

        List<PhysicalOperator> reduceRoots = mr.reducePlan.getRoots();
        if (reduceRoots.size() != 1) {
            messageCollector.collect("Expected reduce to have single leaf", MessageType.Warning, PigWarning.MULTI_LEAF_REDUCE);
            return;
        }

        // I expect that the first root should always be a POPackage.  If
        // not, I don't know what's going on, so I'm out of here.
        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
            messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors =
            mr.reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) return;
        PhysicalOperator successor = packSuccessors.get(0);

        if (successor instanceof POLimit) {
            //POLimit is acceptable, as long has it has a single foreach
            // as successor
            List<PhysicalOperator> limitSucs =
                mr.reducePlan.getSuccessors(successor);
            if(limitSucs != null && limitSucs.size() == 1 &&
                    limitSucs.get(0) instanceof POForEach) {
                // the code below will now further examine
                // the foreach
                successor = limitSucs.get(0);
            }

        }
        if (successor instanceof POForEach) {
            POForEach foreach = (POForEach)successor;
            List<PhysicalPlan> feInners = foreach.getInputPlans();

            // find algebraic operators and also check if the foreach statement
            // is suitable for combiner use
            List<Pair<PhysicalOperator, PhysicalPlan>> algebraicOps =
                findAlgebraicOps(feInners);
            if(algebraicOps == null || algebraicOps.size() == 0){
                // the plan is not  combinable or there is nothing to combine
                //we're done
                return;
            }
            if (mr.combinePlan.getRoots().size() != 0) {
                messageCollector.collect("Wasn't expecting to find anything already "
                        + "in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
                return;
            }

            log.info("Choosing to move algebraic foreach to combiner");

            try {


                // replace PODistinct->Project[*] with distinct udf (which is Algebriac)
                for(Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps ){
                    if(! (op2plan.first instanceof PODistinct))
                        continue;
                    DistinctPatcher distinctPatcher = new DistinctPatcher(op2plan.second);
                    distinctPatcher.visit();
                    if(distinctPatcher.getDistinct() == null){
                        int errCode = 2073;
                        String msg = "Problem with replacing distinct operator with distinct built-in function.";
                        throw new PlanException(msg, errCode, PigException.BUG);
                    }
                    op2plan.first = distinctPatcher.getDistinct();
                }

                //create new map foreach
                POForEach mfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());               
                Map<PhysicalOperator, Integer> op2newpos =
                    new HashMap<PhysicalOperator, Integer>();
                Integer pos = 1;
                //create plan for each algebraic udf and add as inner plan in map-foreach
                for(Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps ){
                    PhysicalPlan udfPlan = createPlanWithPredecessors(op2plan.first, op2plan.second);
                    mfe.addInputPlan(udfPlan, false);
                    op2newpos.put(op2plan.first, pos++);
                }
                changeFunc(mfe, POUserFunc.INITIAL);

                // since we will only be creating SingleTupleBag as input to
                // the map foreach, we should flag the POProjects in the map
                // foreach inner plans to also use SingleTupleBag
                for (PhysicalPlan mpl : mfe.getInputPlans()) {
                    try {
                        new fixMapProjects(mpl).visit();
                    } catch (VisitorException e) {
                        int errCode = 2089;
                        String msg = "Unable to flag project operator to use single tuple bag.";
                        throw new PlanException(msg, errCode, PigException.BUG, e);
                    }
                }

                //create new combine foreach
                POForEach cfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
                //add algebraic functions with appropriate projection
                addAlgebraicFuncToCombineFE(cfe, op2newpos);
                changeFunc(cfe, POUserFunc.INTERMEDIATE);

                //fix projection and function time for algebraic functions in reduce foreach
                for(Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps ){
                    setProjectInput(op2plan.first, op2plan.second, op2newpos.get(op2plan.first));
                    ((POUserFunc)op2plan.first).setAlgebraicFunction(POUserFunc.FINAL);
                }


                // we have modified the foreach inner plans - so set them
                // again for the foreach so that foreach can do any re-initialization
                // around them.
                // FIXME - this is a necessary evil right now because the leaves are explicitly
                // stored in the POForeach as a list rather than computed each time at
                // run time from the plans for optimization. Do we want to have the Foreach
                // compute the leaves each time and have Java optimize it (will Java optimize?)?
                mfe.setInputPlans(mfe.getInputPlans());
                cfe.setInputPlans(cfe.getInputPlans());
                foreach.setInputPlans(foreach.getInputPlans());

                //tell POCombinerPackage which fields need projected and
                // which placed in bags. First field is simple project
                // rest need to go into bags
                int numFields = algebraicOps.size() + 1; // algebraic funcs + group key
                boolean[] bags = new boolean[numFields];
                bags[0] = false;
                for (int i = 1; i < numFields; i++) {
                    bags[i] = true;
                }

                // Use the POCombiner package in the combine plan
                // as it needs to act differently than the regular
                // package operator.
                mr.combinePlan = new PhysicalPlan();
                POCombinerPackage combinePack =
                    new POCombinerPackage(pack, bags);
                mr.combinePlan.add(combinePack);
                mr.combinePlan.add(cfe);
                mr.combinePlan.connect(combinePack, cfe);
                // No need to connect projections in cfe to cp, because
                // PigCombiner directly attaches output from package to
                // root of remaining plan.

                POLocalRearrange mlr = getNewRearrange(rearrange);


                // A specialized local rearrange operator will replace
                // the normal local rearrange in the map plan. This behaves
                // like the regular local rearrange in the getNext()
                // as far as getting its input and constructing the
                // "key" out of the input. It then returns a tuple with
                // two fields - the key in the first position and the
                // "value" inside a bag in the second position. This output
                // format resembles the format out of a Package. This output
                // will feed to the map foreach which expects this format.
                // If the key field isn't in the project of the combiner or map foreach,
                // it is added to the end (This is required so that we can
                // set up the inner plan of the new Local Rearrange leaf in the map
                // and combine plan to contain just the project of the key).
                patchUpMap(mr.mapPlan, getPreCombinerLR(rearrange), mfe, mlr);
                POLocalRearrange clr = getNewRearrange(rearrange);

                mr.combinePlan.add(clr);
                mr.combinePlan.connect(cfe, clr);

                // Change the package operator in the reduce plan to
View Full Code Here

     * @throws PlanException
     */
    private void patchUpMap(PhysicalPlan mapPlan, POPreCombinerLocalRearrange preCombinerLR,
            POForEach mfe, POLocalRearrange mlr) throws PlanException {

        POLocalRearrange oldLR = (POLocalRearrange)mapPlan.getLeaves().get(0);
        mapPlan.replace(oldLR, preCombinerLR);

        mapPlan.add(mfe);
        mapPlan.connect(preCombinerLR, mfe);

View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.