Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach


     * @param keyType type for group-by key
     * @return new POForeach
     */
    private static POForEach createForEachWithGrpProj(POForEach foreach, byte keyType) {
        String scope = foreach.getOperatorKey().scope;
        POForEach newFE = new POForEach(createOperatorKey(scope), new ArrayList<PhysicalPlan>());
        newFE.addOriginalLocation(foreach.getAlias(), foreach.getOriginalLocations());
        newFE.setResultType(foreach.getResultType());
        //create plan that projects the group column
        PhysicalPlan grpProjPlan = new PhysicalPlan();
        //group by column is the first column
        POProject proj = new POProject(createOperatorKey(scope), 1, 0);
        proj.setResultType(keyType);
        grpProjPlan.add(proj);
        newFE.addInputPlan(grpProjPlan, false);
        return newFE;
    }
View Full Code Here


        }

        // visit the POForEach of the reduce plan. We can have Limit and Filter
        // in the middle
        PhysicalOperator currentNode = root;
        POForEach foreach = null;
        while (currentNode != null) {
            if (currentNode instanceof POPackage
                    && !(((POPackage) currentNode).getPkgr() instanceof JoinPackager)
                    || currentNode instanceof POFilter
                    || currentNode instanceof POLimit) {
                List<PhysicalOperator> succs = reducePlan
                        .getSuccessors(currentNode);
                if (succs == null) // We didn't find POForEach
                    return null;
                if (succs.size() != 1) {
                    log.debug("See multiple output for " + currentNode
                            + " in reduce plan, skip secondary key optimizing");
                    return null;
                }
                currentNode = succs.get(0);
            } else if (currentNode instanceof POForEach) {
                foreach = (POForEach) currentNode;
                break;
            } else { // Skip optimization
                return null;
            }
        }

        // We do not find a foreach (we shall not come here, a trick to fool findbugs)
        if (foreach==null)
            return null;

        sortsToRemove = new ArrayList<POToChange>();
        distinctsToChange = new ArrayList<POToChange>();

        for (PhysicalPlan innerPlan : foreach.getInputPlans()) {
            // visit inner plans to figure out the sort order for distinct /
            // sort
            SecondaryKeyDiscover innerPlanDiscover = new SecondaryKeyDiscover(
                    innerPlan, sortKeyInfos, secondarySortKeyInfo);
            try {
View Full Code Here

                feproj2.setStar(false);
                feproj2.setOverloaded(false);
                fep2.add(feproj2);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                POForEach poForEach = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans, flattenLst);

                List<LogicalExpressionPlan> rankPlans = loRank.getRankColPlans();
                byte[] newTypes = new byte[rankPlans.size()];

                for(int i = 0; i < rankPlans.size(); i++) {
                    LogicalExpressionPlan loep = rankPlans.get(i);
                    Iterator<Operator> inpOpers = loep.getOperators();

                    while(inpOpers.hasNext()) {
                        Operator oper = inpOpers.next();
                        newTypes[i] = ((ProjectExpression) oper).getType();
                    }
                }

                List<PhysicalPlan> newPhysicalPlan = new ArrayList<PhysicalPlan>();
                List<Boolean> newOrderPlan = new ArrayList<Boolean>();

                for(int i = 0; i < loRank.getRankColPlans().size(); i++) {
                    PhysicalPlan fep3 = new PhysicalPlan();
                    POProject feproj3 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                    feproj3.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                    feproj3.setColumn(i);
                    feproj3.setResultType(newTypes[i]);
                    feproj3.setStar(false);
                    feproj3.setOverloaded(false);
                    fep3.add(feproj3);

                    newPhysicalPlan.add(fep3);
                    newOrderPlan.add(loRank.getAscendingCol().get(i));
                }

                POSort poSort;
                poSort = new POSort(new OperatorKey(scope, nodeGen
                        .getNextNodeId(scope)), -1, null,
                        newPhysicalPlan, newOrderPlan, null);
                poSort.addOriginalLocation(loRank.getAlias(), loRank.getLocation());


                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                List<Boolean> flattenLst2 = Arrays.asList(false, true);

                PhysicalPlan fep12 = new PhysicalPlan();
                POProject feproj12 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj12.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj12.setColumn(0);
                feproj12.setResultType(DataType.LONG);
                feproj12.setStar(false);
                feproj12.setOverloaded(false);
                fep12.add(feproj12);


                PhysicalPlan fep22 = new PhysicalPlan();
                POProject feproj22 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj22.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj22.setColumn(loRank.getRankColPlans().size()+1);
                feproj22.setResultType(DataType.BAG);
                feproj22.setStar(false);
                feproj22.setOverloaded(false);
                fep22.add(feproj22);
                List<PhysicalPlan> fePlans2 = Arrays.asList(fep12, fep22);

                POForEach poForEach2 = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans2, flattenLst2);

                currentPlan.add(poForEach);
                currentPlan.add(poSort);
                currentPlan.add(poCounter);
                currentPlan.add(poRank);
View Full Code Here

                    feproj.setStar(true);
                    feproj.setOverloaded(false);
                    fep2.add(feproj);
                    List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                    POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelism(), fePlans, flattenLst );
                    fe.setMapSideOnly(true);
                    fe.addOriginalLocation(cross.getAlias(), cross.getLocation());
                    currentPlan.add(fe);
                    currentPlan.connect(logToPhyMap.get(op), fe);

                    POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
                            scope, nodeGen.getNextNodeId(scope)), cross
                            .getRequestedParallelism());
                    physOp.addOriginalLocation(cross.getAlias(), cross.getLocation());
                    List<PhysicalPlan> lrPlans = new ArrayList<PhysicalPlan>();
                    for(int i=0;i<inputs.size();i++){
                        PhysicalPlan lrp1 = new PhysicalPlan();
                        POProject lrproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelism(), i);
                        lrproj1.addOriginalLocation(cross.getAlias(), cross.getLocation());
                        lrproj1.setOverloaded(false);
                        lrproj1.setResultType(DataType.INTEGER);
                        lrp1.add(lrproj1);
                        lrPlans.add(lrp1);
                    }

                    physOp.setCross(true);
                    physOp.setIndex(count++);
                    physOp.setKeyType(DataType.TUPLE);
                    physOp.setPlans(lrPlans);
                    physOp.setResultType(DataType.TUPLE);

                    currentPlan.add(physOp);
                    currentPlan.connect(fe, physOp);
                    currentPlan.connect(physOp, poGlobal);
                }
            } catch (PlanException e1) {
                int errCode = 2015;
                String msg = "Invalid physical operators in the physical plan" ;
                throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
            } catch (ExecException e) {
                int errCode = 2058;
                String msg = "Unable to set index on newly create POLocalRearrange.";
                throw new VisitorException(msg, errCode, PigException.BUG, e);
            }

            poPackage.getPkgr().setKeyType(DataType.TUPLE);
            poPackage.setResultType(DataType.TUPLE);
            poPackage.setNumInps(count);
            boolean inner[] = new boolean[count];
            for (int i=0;i<count;i++) {
                inner[i] = true;
            }
            poPackage.getPkgr().setInner(inner);

            List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
            List<Boolean> flattenLst = new ArrayList<Boolean>();
            for(int i=1;i<=count;i++){
                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelism(), i);
                feproj1.addOriginalLocation(cross.getAlias(), cross.getLocation());
                feproj1.setResultType(DataType.BAG);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);
                fePlans.add(fep1);
                flattenLst.add(true);
            }

            POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelism(), fePlans, flattenLst );
            fe.addOriginalLocation(cross.getAlias(), cross.getLocation());
            currentPlan.add(fe);
            try{
                currentPlan.connect(poPackage, fe);
            }catch (PlanException e1) {
                int errCode = 2015;
View Full Code Here

            }
        }
        if (schema != null) {
            SchemaTupleFrontend.registerToGenerateIfPossible(schema, false, GenContext.FOREACH); //TODO may need to be appendable
        }
        POForEach poFE = new POForEach(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), foreach.getRequestedParallelism(), innerPlans, flattenList, schema);
        poFE.addOriginalLocation(foreach.getAlias(), foreach.getLocation());
        poFE.setResultType(DataType.BAG);
        logToPhyMap.put(foreach, poFE);
        currentPlan.add(poFE);

        // generate cannot have multiple inputs
        List<Operator> op = foreach.getPlan().getPredecessors(foreach);
View Full Code Here

            }

            if(!usePOMergeJoin){
                // Now create and configure foreach which will flatten the output
                // of cogroup.
                POForEach fe = compileFE4Flattening(innerFlags,  scope, parallel, alias, location, inputs);
                currentPlan.add(fe);
                try {
                    currentPlan.connect(smj, fe);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }
                logToPhyMap.put(loj, fe);
            }

            return;
        }
        else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
            POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans());
            POForEach fe = compileFE4Flattening(innerFlags,  scope, parallel, alias, location, inputs);
            currentPlan.add(fe);
            try {
                currentPlan.connect(poPackage, fe);
            } catch (PlanException e) {
                throw new LogicalToPhysicalTranslatorException(e.getDetailedMessage(),
View Full Code Here

            int parallel, String alias, SourceLocation location, List<Operator> inputs)
                throws FrontendException {

        List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
        List<Boolean> flattenLst = new ArrayList<Boolean>();
        POForEach fe;
        try{
            for(int i=0;i< inputs.size();i++){
                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                        parallel, i+1); //i+1 since the first column is the "group" field
                feproj1.addOriginalLocation(alias, location);
                feproj1.setResultType(DataType.BAG);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);
                fePlans.add(fep1);
                // the parser would have marked the side
                // where we need to keep empty bags on
                // non matched as outer (innerFlags[i] would be
                // false)
                if(!(innerFlags[i])) {
                    Operator joinInput = inputs.get(i);
                    // for outer join add a bincond
                    // which will project nulls when bag is
                    // empty
                    updateWithEmptyBagCheck(fep1, joinInput);
                }
                flattenLst.add(true);
            }

            fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                    parallel, fePlans, flattenLst );
            fe.addOriginalLocation(alias, location);

        }catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
View Full Code Here

            prj1.setColumn(0);
            prj1.setOverloaded(false);
            ep1.add(prj1);
            eps1.add(ep1);
            flat1.add(true);
            POForEach nfe1 = new POForEach(new OperatorKey(scope, nig
                    .getNextNodeId(scope)), op.getRequestedParallelism(), eps1,
                    flat1);
            nfe1.setResultType(DataType.BAG);
            curMROp.reducePlan.addAsLeaf(nfe1);
            curMROp.setNeedsDistinctCombiner(true);
            phyToMROpMap.put(op, curMROp);
            curMROp.phyToMRMap.put(op, nfe1);
        }catch(Exception e){
View Full Code Here

                    CompilerUtils.addEmptyBagOuterJoin(ep, op.getSchema(i));
                }
                flat.add(true);
            }

            POForEach fe = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, eps, flat);
            fe.setResultType(DataType.TUPLE);

            fe.visit(this);

            curMROp.setSkewedJoinPartitionFile(partitionFile.getFileName());
            phyToMROpMap.put(op, curMROp);
        }catch(PlanException e) {
            int errCode = 2034;
View Full Code Here

            prj_c1.setOverloaded(false);
            prj_c1.setResultType(DataType.BAG);
            ep_c1.add(prj_c1);
            eps_c1.add(ep_c1);
            flat_c1.add(true);
            POForEach fe_c1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),
                    -1, eps_c1, flat_c1);
            fe_c1.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(fe_c1);

            POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
            pLimit.setLimit(limit);
            mro.combinePlan.addAsLeaf(pLimit);

            List<PhysicalPlan> eps_c2 = new ArrayList<PhysicalPlan>();
            eps_c2.addAll(sort.getSortPlans());

            POLocalRearrange lr_c2 = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
            try {
                lr_c2.setIndex(0);
            } catch (ExecException e) {
                int errCode = 2058;
                String msg = "Unable to set index on newly created POLocalRearrange.";
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }
            lr_c2.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
            lr_c2.setPlans(eps_c2);
            lr_c2.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(lr_c2);
        }

        POPackage pkg = new POPackage(new OperatorKey(scope,
                nig.getNextNodeId(scope)));
        LitePackager pkgr = new LitePackager();
        pkgr.setKeyType((fields == null || fields.length > 1) ? DataType.TUPLE : keyType);
        pkg.setPkgr(pkgr);
        pkg.setNumInps(1);
        mro.reducePlan.add(pkg);

        PhysicalPlan ep = new PhysicalPlan();
        POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prj.setColumn(1);
        prj.setOverloaded(false);
        prj.setResultType(DataType.BAG);
        ep.add(prj);
        List<PhysicalPlan> eps2 = new ArrayList<PhysicalPlan>();
        eps2.add(ep);
        List<Boolean> flattened = new ArrayList<Boolean>();
        flattened.add(true);
        POForEach nfe1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),-1,eps2,flattened);
        mro.reducePlan.add(nfe1);
        mro.reducePlan.connect(pkg, nfe1);
        mro.phyToMRMap.put(sort, nfe1);
        if (limit!=-1)
        {
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.