Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange


            } else if (mapLeaf instanceof POUnion) {
                List<PhysicalOperator> preds = mr.mapPlan
                        .getPredecessors(mapLeaf);
                boolean found = false;
                for (PhysicalOperator pred : preds) {
                    POLocalRearrange rearrange = (POLocalRearrange) pred;
                    rearrange.setUseSecondaryKey(true);
                    if (rearrange.getIndex() == indexOfRearrangeToChange) {
                        // Try to find the POLocalRearrange for the secondary key
                        found = true;
                        setSecondaryPlan(mr.mapPlan, rearrange, secondarySortKeyInfo);
                    }
                }
View Full Code Here


       
        List<PhysicalPlan> pls = splitOp.getPlans();
        for (PhysicalPlan pl : pls) {
            PhysicalOperator leaf = pl.getLeaves().get(0);
            if (leaf instanceof POLocalRearrange) {
                POLocalRearrange lr = (POLocalRearrange)leaf;
                try {
                    lr.setMultiQueryIndex(index++);
                } catch (ExecException e) {                   
                    int errCode = 2136;
                    String msg = "Internal Error. Unable to set multi-query index for optimization.";
                    throw new OptimizerException(msg, errCode, PigException.BUG, e);                  
                }
               
                // change the map key type to tuple when
                // multiple splittees have different map key types
                if (!sameKeyType) {
                    lr.setKeyType(DataType.TUPLE);
                }
            } else if (leaf instanceof POSplit) {
                POSplit spl = (POSplit)leaf;
                index = setIndexOnLRInSplit(index, spl, sameKeyType);
            }
View Full Code Here

               
        int curIndex = index;
       
        PhysicalOperator leaf = pl.getLeaves().get(0);
        if (leaf instanceof POLocalRearrange) {
            POLocalRearrange lr = (POLocalRearrange)leaf;
            try {
                lr.setMultiQueryIndex(curIndex++)
            } catch (ExecException e) {                                     
                int errCode = 2136;
                String msg = "Internal Error. Unable to set multi-query index for optimization.";
                throw new OptimizerException(msg, errCode, PigException.BUG, e);
            }
           
            // change the map key type to tuple when
            // multiple splittees have different map key types
            if (!sameKeyType) {
                lr.setKeyType(DataType.TUPLE);
            }
        } else if (leaf instanceof POSplit) {
            // if the map plan that we are trying to merge
            // has a split, we need to update the indices of
            // the POLocalRearrange operators in the inner plans
View Full Code Here

        PhysicalOperator leaf = from.getLeaves().get(0);
        if (leaf instanceof PODemux) {
            List<PhysicalPlan> pls = ((PODemux)leaf).getPlans();
            for (PhysicalPlan pl : pls) {
                demux.addPlan(pl, mapKeyType, keyPos);
                POLocalRearrange lr = (POLocalRearrange)pl.getLeaves().get(0);
                try {
                    lr.setMultiQueryIndex(initial + plCount++);           
                } catch (ExecException e) {                                       
                    int errCode = 2136;
                    String msg = "Internal Error. Unable to set multi-query index for optimization.";
                    throw new OptimizerException(msg, errCode, PigException.BUG, e);
                }
               
                // change the map key type to tuple when
                // multiple splittees have different map key types
                if (!isSameKeyType) {
                    lr.setKeyType(DataType.TUPLE);
                }
            }
        } else {
            demux.addPlan(from, mapKeyType, keyPos);
            POLocalRearrange lr = (POLocalRearrange)from.getLeaves().get(0);
            try {
                lr.setMultiQueryIndex(initial + plCount++);           
            } catch (ExecException e) {                                       
                int errCode = 2136;
                String msg = "Internal Error. Unable to set multi-query index for optimization.";
                throw new OptimizerException(msg, errCode, PigException.BUG, e);
            }
               
            // change the map key type to tuple when
            // multiple splittees have different map key types
            if (!isSameKeyType) {
                lr.setKeyType(DataType.TUPLE);
            }
        }
       
        if (plCount != total) {
            int errCode = 2146;
View Full Code Here

        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);
        if (!(mapLeaf instanceof POLocalRearrange)) {
            return;
        }
        POLocalRearrange rearrange = (POLocalRearrange)mapLeaf;

        List<PhysicalOperator> reduceRoots = mr.reducePlan.getRoots();
        if (reduceRoots.size() != 1) {
          messageCollector.collect("Expected reduce to have single leaf", MessageType.Warning, PigWarning.MULTI_LEAF_REDUCE);
            return;
        }

        // I expect that the first root should always be a POPackage.  If
        // not, I don't know what's going on, so I'm out of here.
        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
          messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors =
            mr.reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) return;
        PhysicalOperator successor = packSuccessors.get(0);

        // Need to check if this is a distinct.
        if (successor instanceof POFilter) {
            /*
               Later
            POFilter filter = (POFilter)successor;
            PhysicalPlan filterInner = filter.getPlan();
            if (onKeysOnly(filterInner)) {
                // TODO move filter to combiner
                // TODO Patch up projects of filter successor
                // Call ourselves again, as we may be able to move the next
                // operator too.
                visitMROp(mr);
            } else if (algebraic(filterInner)) {
                // TODO Duplicate filter to combiner
            }
            */
        } else if (successor instanceof POForEach) {
            POForEach foreach = (POForEach)successor;
            List<PhysicalPlan> feInners = foreach.getInputPlans();
            List<ExprType> ap = algebraic(feInners, foreach.getToBeFlattened());
            if (ap != null) {
                log.info("Choosing to move algebraic foreach to combiner");

                // Need to insert two new foreachs - one  in the combine
        // and one in the map plan which will be based on the reduce foreach.
        // The map foreach will have one inner plan for each
        // inner plan in the foreach we're duplicating.  For
        // projections, the plan will be the same.  For algebraic
        // udfs, the plan will have the initial version of the function.
       
        // The combine foreach will have one inner plan for each
        // inner plan in the foreach we're duplicating.  For
        // projections, the project operators will be changed to
        // project the same column as its position in the
        // foreach. For algebraic udfs, the plan will have the
        // intermediate version of the function. The input to the
        // udf will be a POProject which will project the column
        // corresponding to the position of the udf in the foreach
       
          // In the inner plans of the reduce foreach for  
        // projections, the project operators will be changed to
        // project the same column as its position in the
        // foreach. For algebraic udfs, the plan will have the
        // final version of the function. The input to the
        // udf will be a POProject which will project the column
        // corresponding to the position of the udf in the foreach
                if (mr.combinePlan.getRoots().size() != 0) {
                  messageCollector.collect("Wasn't expecting to find anything already "
                        + "in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
                    return;
                }
                mr.combinePlan = new PhysicalPlan();
                try {
                    // If we haven't already found the key (and thus the
                    // key type) we need to figure out the key type now.
                    if (mKeyType == 0) {
                        mKeyType = rearrange.getKeyType();
                    }

                    POForEach mfe = foreach.clone();
                    POForEach cfe = foreach.clone();
                    fixUpForeachs(mfe, cfe, foreach, ap);
                   
                   
                    // Use the ExprType list returned from algebraic to tell
                    // POCombinerPackage which fields need projected and
                    // which placed in bags.
                    int numFields = (mKeyField >= ap.size()) ? mKeyField + 1 :
                        ap.size();
                    boolean[] bags = new boolean[numFields];
                    for (int i = 0; i < ap.size(); i++) {
                        if (ap.get(i) == ExprType.SIMPLE_PROJECT) bags[i] = false;
                        else bags[i] = true;
                    }
                    bags[mKeyField] = false;
          // Use the POCombiner package in the combine plan
          // as it needs to act differently than the regular
          // package operator.
                    POCombinerPackage combinePack =
                        new POCombinerPackage(pack, bags, keyFieldPositions);
                    mr.combinePlan.add(combinePack);
                    mr.combinePlan.add(cfe);
                    mr.combinePlan.connect(combinePack, cfe);
                    // No need to connect projections in cfe to cp, because
                    // PigCombiner directly attaches output from package to
                    // root of remaining plan.
                   
                    POLocalRearrange mlr = rearrange.clone();
                    fixUpRearrange(mlr);

                    // A specialized local rearrange operator will replace
                    // the normal local rearrange in the map plan. This behaves
                    // like the regular local rearrange in the getNext()
                    // as far as getting its input and constructing the
                    // "key" out of the input. It then returns a tuple with
                    // two fields - the key in the first position and the
                    // "value" inside a bag in the second position. This output
                    // format resembles the format out of a Package. This output
                    // will feed to the map foreach which expects this format.
                    // If the key field isn't in the project of the combiner or map foreach,
                    // it is added to the end (This is required so that we can
                    // set up the inner plan of the new Local Rearrange leaf in the map
                    // and combine plan to contain just the project of the key).
                    patchUpMap(mr.mapPlan, getPreCombinerLR(rearrange), mfe, mlr);
                    POLocalRearrange clr = rearrange.clone();
                    fixUpRearrange(clr);

                    mr.combinePlan.add(clr);
                    mr.combinePlan.connect(cfe, clr);
                   
View Full Code Here

     * @throws PlanException
     */
    private void patchUpMap(PhysicalPlan mapPlan, POPreCombinerLocalRearrange preCombinerLR,
            POForEach mfe, POLocalRearrange mlr) throws PlanException {
       
        POLocalRearrange oldLR = (POLocalRearrange)mapPlan.getLeaves().get(0);
        mapPlan.replace(oldLR, preCombinerLR);
       
        mapPlan.add(mfe);
        mapPlan.connect(preCombinerLR, mfe);
       
View Full Code Here

    public MergeJoinIndexer(String funcSpec, String innerPlan, String serializedPhyPlan) throws ExecException{
       
        loader = (SamplableLoader)PigContext.instantiateFuncFromSpec(funcSpec);
        try {
            List<PhysicalPlan> innerPlans = (List<PhysicalPlan>)ObjectSerializer.deserialize(innerPlan);
            lr = new POLocalRearrange(new OperatorKey("MergeJoin Indexer",NodeIdGenerator.getGenerator().getNextNodeId("MergeJoin Indexer")));
            lr.setPlans(innerPlans);
            keysCnt = innerPlans.size();
            precedingPhyPlan = (PhysicalPlan)ObjectSerializer.deserialize(serializedPhyPlan);
            if(precedingPhyPlan != null){
                    if(precedingPhyPlan.getLeaves().size() != 1 || precedingPhyPlan.getRoots().size() != 1){
View Full Code Here

            } else if (mapLeaf instanceof POUnion) {
                List<PhysicalOperator> preds = mr.mapPlan
                        .getPredecessors(mapLeaf);
                boolean found = false;
                for (PhysicalOperator pred : preds) {
                    POLocalRearrange rearrange = (POLocalRearrange) pred;
                    rearrange.setUseSecondaryKey(true);
                    if (rearrange.getIndex() == indexOfRearrangeToChange) {
                        // Try to find the POLocalRearrange for the secondary key
                        found = true;
                        setSecondaryPlan(mr.mapPlan, rearrange, secondarySortKeyInfo);
                    }
                }
View Full Code Here

        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);
        if (!(mapLeaf instanceof POLocalRearrange)) {
            return;
        }
        POLocalRearrange rearrange = (POLocalRearrange)mapLeaf;

        List<PhysicalOperator> reduceRoots = reducePlan.getRoots();
        if (reduceRoots.size() != 1) {
            messageCollector.collect("Expected reduce to have single root", MessageType.Warning,
                    PigWarning.MULTI_ROOT_REDUCE);
            return;
        }

        // I expect that the first root should always be a POPackage. If not, I
        // don't know what's going on, so I'm out of here.
        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
            messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning,
                    PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors = reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) {
            return;
        }
        PhysicalOperator successor = packSuccessors.get(0);

        if (successor instanceof POLimit) {
            // POLimit is acceptable, as long has it has a single foreach as
            // successor
            List<PhysicalOperator> limitSucs = reducePlan.getSuccessors(successor);
            if (limitSucs != null && limitSucs.size() == 1 &&
                    limitSucs.get(0) instanceof POForEach) {
                // the code below will now further examine the foreach
                successor = limitSucs.get(0);
            }
        }
        if (successor instanceof POForEach) {
            POForEach foreach = (POForEach)successor;
            List<PhysicalPlan> feInners = foreach.getInputPlans();

            // find algebraic operators and also check if the foreach statement
            // is suitable for combiner use
            List<Pair<PhysicalOperator, PhysicalPlan>> algebraicOps = findAlgebraicOps(feInners);
            if (algebraicOps == null || algebraicOps.size() == 0) {
                // the plan is not combinable or there is nothing to combine
                // we're done
                return;
            }
            if (combinePlan != null && combinePlan.getRoots().size() != 0) {
                messageCollector.collect("Wasn't expecting to find anything already " +
                        "in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
                return;
            }

            LOG.info("Choosing to move algebraic foreach to combiner");
            try {
                // replace PODistinct->Project[*] with distinct udf (which is Algebraic)
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    if (! (op2plan.first instanceof PODistinct)) {
                        continue;
                    }
                    DistinctPatcher distinctPatcher = new DistinctPatcher(op2plan.second);
                    distinctPatcher.visit();
                    if (distinctPatcher.getDistinct() == null) {
                        int errCode = 2073;
                        String msg = "Problem with replacing distinct operator with distinct built-in function.";
                        throw new PlanException(msg, errCode, PigException.BUG);
                    }
                    op2plan.first = distinctPatcher.getDistinct();
                }

                // create new map foreach
                POForEach mfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
                Map<PhysicalOperator, Integer> op2newpos = Maps.newHashMap();
                Integer pos = 1;
                // create plan for each algebraic udf and add as inner plan in map-foreach
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    PhysicalPlan udfPlan = createPlanWithPredecessors(op2plan.first, op2plan.second);
                    mfe.addInputPlan(udfPlan, false);
                    op2newpos.put(op2plan.first, pos++);
                }
                changeFunc(mfe, POUserFunc.INITIAL);

                // since we will only be creating SingleTupleBag as input to
                // the map foreach, we should flag the POProjects in the map
                // foreach inner plans to also use SingleTupleBag
                for (PhysicalPlan mpl : mfe.getInputPlans()) {
                    try {
                        new fixMapProjects(mpl).visit();
                    } catch (VisitorException e) {
                        int errCode = 2089;
                        String msg = "Unable to flag project operator to use single tuple bag.";
                        throw new PlanException(msg, errCode, PigException.BUG, e);
                    }
                }

                // create new combine foreach
                POForEach cfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
                // add algebraic functions with appropriate projection
                addAlgebraicFuncToCombineFE(cfe, op2newpos);
                changeFunc(cfe, POUserFunc.INTERMEDIATE);

                // fix projection and function time for algebraic functions in reduce foreach
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    setProjectInput(op2plan.first, op2plan.second, op2newpos.get(op2plan.first));
                    ((POUserFunc)op2plan.first).setAlgebraicFunction(POUserFunc.FINAL);
                }

                // we have modified the foreach inner plans - so set them again
                // for the foreach so that foreach can do any re-initialization
                // around them.
                // FIXME - this is a necessary evil right now because the leaves
                // are explicitly stored in the POForeach as a list rather than
                // computed each time at run time from the plans for
                // optimization. Do we want to have the Foreach compute the
                // leaves each time and have Java optimize it (will Java
                // optimize?)?
                mfe.setInputPlans(mfe.getInputPlans());
                cfe.setInputPlans(cfe.getInputPlans());
                foreach.setInputPlans(foreach.getInputPlans());

                // tell POCombinerPackage which fields need projected and which
                // placed in bags. First field is simple project rest need to go
                // into bags
                int numFields = algebraicOps.size() + 1; // algebraic funcs + group key
                boolean[] bags = new boolean[numFields];
                bags[0] = false;
                for (int i = 1; i < numFields; i++) {
                    bags[i] = true;
                }

                // Use the POCombiner package in the combine plan
                // as it needs to act differently than the regular
                // package operator.
                CombinerPackager pkgr = new CombinerPackager(pack.getPkgr(), bags);
                POPackage combinePack = pack.clone();
                combinePack.setPkgr(pkgr);
                combinePack.setParentPlan(null);

                combinePlan.add(combinePack);
                combinePlan.add(cfe);
                combinePlan.connect(combinePack, cfe);

                // No need to connect projections in cfe to cp, because
                // PigCombiner directly attaches output from package to
                // root of remaining plan.

                POLocalRearrange mlr = getNewRearrange(rearrange);
                POPartialAgg mapAgg = null;
                if (doMapAgg) {
                    mapAgg = createPartialAgg(cfe);
                }

                // A specialized local rearrange operator will replace
                // the normal local rearrange in the map plan. This behaves
                // like the regular local rearrange in the getNext()
                // as far as getting its input and constructing the
                // "key" out of the input. It then returns a tuple with
                // two fields - the key in the first position and the
                // "value" inside a bag in the second position. This output
                // format resembles the format out of a Package. This output
                // will feed to the map foreach which expects this format.
                // If the key field isn't in the project of the combiner or map foreach,
                // it is added to the end (This is required so that we can
                // set up the inner plan of the new Local Rearrange leaf in the map
                // and combine plan to contain just the project of the key).
                patchUpMap(mapPlan, getPreCombinerLR(rearrange), mfe, mapAgg, mlr);
                POLocalRearrange clr = getNewRearrange(rearrange);
                clr.setParentPlan(null);
                combinePlan.add(clr);
                combinePlan.connect(cfe, clr);

                // Change the package operator in the reduce plan to
                // be the POCombiner package, as it needs to act
View Full Code Here

     * @param mlr
     * @throws PlanException
     */
    private static void patchUpMap(PhysicalPlan mapPlan, POPreCombinerLocalRearrange preCombinerLR,
            POForEach mfe, POPartialAgg mapAgg, POLocalRearrange mlr) throws PlanException {
        POLocalRearrange oldLR = (POLocalRearrange)mapPlan.getLeaves().get(0);
        mapPlan.replace(oldLR, preCombinerLR);

        mapPlan.add(mfe);
        mapPlan.connect(preCombinerLR, mfe);

View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.