Package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POPartitionRearrangeTez


            compiledInputs = new TezOperator[] {joinInputs[1]};

            // Run POPartitionRearrange for second join table. Note we set the
            // parallelism of POPartitionRearrange to -1, so its parallelism
            // will be determined by the size of streaming table.
            POPartitionRearrangeTez pr =
                    new POPartitionRearrangeTez(OperatorKey.genOpKey(scope));
            try {
                pr.setIndex(1);
            } catch (ExecException e) {
                int errCode = 2058;
                String msg = "Unable to set index on newly created POPartitionRearrange.";
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }

            groups = joinPlans.get(l.get(1));
            pr.setPlans(groups);
            pr.setKeyType(type);
            pr.setSkewedJoin(true);
            pr.setResultType(DataType.TUPLE);
            joinJobs[1].plan.addAsLeaf(pr);
            joinJobs[1].setClosed(true);
            rearrangeOutputs[1] = joinJobs[1];

            compiledInputs = rearrangeOutputs;

            // Create POGlobalRearrange
            POGlobalRearrange gr =
                    new POGlobalRearrange(OperatorKey.genOpKey(scope), rp);
            // Skewed join has its own special partitioner
            gr.setResultType(DataType.TUPLE);
            gr.visit(this);
            joinJobs[2] = curTezOp;
            joinJobs[2].setRequestedParallelism(rp);

            compiledInputs = new TezOperator[] {joinJobs[2]};

            // Create POPakcage
            POPackage pkg = getPackage(2, type);
            pkg.setResultType(DataType.TUPLE);
            boolean [] inner = op.getInnerFlags();
            pkg.getPkgr().setInner(inner);
            pkg.visit(this);

            compiledInputs = new TezOperator[] {curTezOp};

            // Create POForEach
            List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
            List<Boolean> flat = new ArrayList<Boolean>();

            // Add corresponding POProjects
            for (int i=0; i < 2; i++) {
                ep = new PhysicalPlan();
                POProject prj = new POProject(OperatorKey.genOpKey(scope));
                prj.setColumn(i+1);
                prj.setOverloaded(false);
                prj.setResultType(DataType.BAG);
                ep.add(prj);
                eps.add(ep);
                if (!inner[i]) {
                    // Add an empty bag for outer join
                    CompilerUtils.addEmptyBagOuterJoin(ep, op.getSchema(i));
                }
                flat.add(true);
            }

            POForEach fe =
                    new POForEach(OperatorKey.genOpKey(scope), -1, eps, flat);
            fe.setResultType(DataType.TUPLE);
            fe.visit(this);

            // Connect vertices
            lrTez.setOutputKey(joinJobs[0].getOperatorKey().toString());
            lrTezSample.setOutputKey(sampleJobPair.first.getOperatorKey().toString());
            identityInOutTez.setOutputKey(joinJobs[2].getOperatorKey().toString());
            pr.setOutputKey(joinJobs[2].getOperatorKey().toString());

            TezEdgeDescriptor edge = joinJobs[0].inEdges.get(prevOp.getOperatorKey());
            joinJobs[0].setUseMRMapSettings(prevOp.isUseMRMapSettings());
            // TODO: Convert to unsorted shuffle after TEZ-661
            // Use 1-1 edge
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POPartitionRearrangeTez

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.