Examples of POValueOutputTez

org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

                    userFunc.setFuncSpec(newSpec);


                    if (storeSeen.containsKey(store)) {
                        storeSeen.get(store).addOutputKey(tezOp.getOperatorKey().toString());
                    } else {
                        POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                        output.addOutputKey(tezOp.getOperatorKey().toString());
                        from.plan.remove(from.plan.getOperator(store.getOperatorKey()));
                        from.plan.addAsLeaf(output);
                        storeSeen.put(store, output);


                        //Remove unused store filename

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

                if (p instanceof POStore) {
                    PhysicalOperator store = oper.plan.getOperator(p.getOperatorKey());
                    // replace POStore to POValueOutputTez, convert the tezOperator to splitter
                    oper.plan.disconnect(oper.plan.getPredecessors(store).get(0), store);
                    oper.plan.remove(store);
                    POValueOutputTez valueOutput = new POValueOutputTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
                    oper.plan.addAsLeaf(valueOutput);
                    oper.setSplitter(true);


                    // Create a splittee of store only
                    TezOperator storeOnlyTezOperator = getTezOp();

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

                    return;
                }
            }


            // Need to add POValueOutputTez to the end of the last tezOp
            POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
            output.copyAliasFrom(op);
            curTezOp.plan.addAsLeaf(output);
            TezOperator prevOp = curTezOp;


            // Mark the start of a new TezOperator which will do the actual limiting with 1 task.
            blocking();


            // Explicitly set the parallelism for the new vertex to 1.
            curTezOp.setRequestedParallelism(1);
            curTezOp.setDontEstimateParallelism(true);


            output.addOutputKey(curTezOp.getOperatorKey().toString());
            // LIMIT does not make any ordering guarantees and this is unsorted shuffle.
            TezEdgeDescriptor edge = curTezOp.inEdges.get(prevOp.getOperatorKey());
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);


            // Limit after order by with scalar expression
            if (limitAfterSort) {
                curTezOp.markLimitAfterSort();
                output.setTaskIndexWithRecordIndexAsKey(true);
                // POValueOutputTez will write key (task index, record index) in
                // sorted order. So using UnorderedKVOutput instead of OrderedPartitionedKVOutput.
                // But input needs to be merged in sorter order and requires OrderedGroupedKVInput
                edge.outputClassName = UnorderedKVOutput.class.getName();
                edge.inputClassName = OrderedGroupedKVInput.class.getName();

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

            // it will reflect for joinJobs[0] so that 1-1 edge will work.
            joinJobs[0].setRequestedParallelismByReference(prevOp);


            TezCompilerUtil.connect(tezPlan, prevOp, sampleJobPair.first);


            POValueOutputTez sampleOut = (POValueOutputTez) sampleJobPair.first.plan.getLeaves().get(0);
            for (int i = 0; i < 2; i++) {
                joinJobs[i].setSampleOperator(sampleJobPair.first);


                // Configure broadcast edges for distribution map
                edge = TezCompilerUtil.connect(tezPlan, sampleJobPair.first, joinJobs[i]);
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
                sampleOut.addOutputKey(joinJobs[i].getOperatorKey().toString());


                // Configure skewed partitioner for join
                edge = joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
                edge.partitionerClass = SkewedPartitionerTez.class;
            }

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

        POForEach nfe3 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, ep4s, flattened3);


        oper.plan.add(nfe3);
        oper.plan.connect(nfe2, nfe3);


        POValueOutputTez sampleOut = new POValueOutputTez(OperatorKey.genOpKey(scope));
        oper.plan.add(sampleOut);
        oper.plan.connect(nfe3, sampleOut);
        oper.setClosed(true);


        oper.setRequestedParallelism(1);

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

            lr.setOutputKey(sortOpers[0].getOperatorKey().toString());
            lrSample.setOutputKey(quantJobParallelismPair.first.getOperatorKey().toString());


            edge = TezCompilerUtil.connect(tezPlan, quantJobParallelismPair.first, sortOpers[0]);
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
            POValueOutputTez sampleOut = (POValueOutputTez)quantJobParallelismPair.first.plan.getLeaves().get(0);
            sampleOut.addOutputKey(sortOpers[0].getOperatorKey().toString());
            sortOpers[0].setSampleOperator(quantJobParallelismPair.first);


            edge = TezCompilerUtil.connect(tezPlan, sortOpers[0], sortOpers[1]);
            edge.partitionerClass = WeightedRangePartitionerTez.class;


            curTezOp = sortOpers[1];


            // TODO: Review sort udf
//            if(op.isUDFComparatorUsed){
//                curTezOp.UDFs.add(op.getMSortFunc().getFuncSpec().toString());
//                curTezOp.isUDFComparatorUsed = true;
//            }
            quantJobParallelismPair.first.setSortOperator(sortOpers[1]);


            // If Order by followed by Limit and parallelism of order by is not 1
            // add a new vertex for Limit with parallelism 1.
            // Equivalent of LimitAdjuster.java in MR
            if (op.isLimited() && rp != 1) {
                POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                output.copyAliasFrom(op);
                sortOpers[1].plan.addAsLeaf(output);


                TezOperator limitOper = getTezOp();
                tezPlan.add(limitOper);
                curTezOp = limitOper;


                // Explicitly set the parallelism for the new vertex to 1.
                limitOper.setRequestedParallelism(1);
                limitOper.setDontEstimateParallelism(true);
                limitOper.markLimitAfterSort();


                edge = TezCompilerUtil.connect(tezPlan, sortOpers[1], limitOper);
                // LIMIT in this case should be ordered. So we output unordered with key as task index
                // and on the input we use OrderedGroupedKVInput to do ordered merge to retain sorted order.
                output.addOutputKey(limitOper.getOperatorKey().toString());
                output.setTaskIndexWithRecordIndexAsKey(true);
                edge = curTezOp.inEdges.get(sortOpers[1].getOperatorKey());
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);
                // POValueOutputTez will write key (task index, record index) in
                // sorted order. So using UnorderedKVOutput instead of OrderedPartitionedKVOutput.
                // But input needs to be merged in sorter order and requires OrderedGroupedKVInput

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez


    @Override
    public void visitSplit(POSplit op) throws VisitorException {
        try {
            TezOperator splitOp = curTezOp;
            POValueOutputTez output = null;
            if (splitsSeen.containsKey(op.getOperatorKey())) {
                splitOp = splitsSeen.get(op.getOperatorKey());
                output = (POValueOutputTez)splitOp.plan.getLeaves().get(0);
            } else {
                splitsSeen.put(op.getOperatorKey(), splitOp);
                splitOp.setSplitter(true);
                phyToTezOpMap.put(op, splitOp);
                output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                output.copyAliasFrom(op);
                splitOp.plan.addAsLeaf(output);
            }
            curTezOp = getTezOp();
            curTezOp.setSplitParent(splitOp.getOperatorKey());
            tezPlan.add(curTezOp);
            output.addOutputKey(curTezOp.getOperatorKey().toString());
            TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, splitOp, curTezOp);
            //TODO shared edge once support is available in Tez
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.ONE_TO_ONE);
            curTezOp.setRequestedParallelismByReference(splitOp);
            POValueInputTez input = new POValueInputTez(OperatorKey.genOpKey(scope));

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

                // Some predecessors of union need not be part of the union (For eg: replicated join).
                // So mark predecessors that are input to the union operation.
                unionTezOp.addUnionPredecessor(prevTezOp.getOperatorKey());
                TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, prevTezOp, unionTezOp);
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);
                outputs[i] = new POValueOutputTez(OperatorKey.genOpKey(scope));
                outputs[i].addOutputKey(unionTezOp.getOperatorKey().toString());
                unionInput.addInputKey(prevTezOp.getOperatorKey().toString());
                prevTezOp.plan.addAsLeaf(outputs[i]);
                prevTezOp.setClosed(true);
                if (prevTezOp.isUseMRMapSettings()) {

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

    public static PhysicalPlan getUnionPredPlanFromSplit(PhysicalPlan plan, String unionOpKey) throws VisitorException {
        List<POSplit> splits = PlanHelper.getPhysicalOperators(plan, POSplit.class);
        for (POSplit split : splits) {
            for (PhysicalPlan subPlan : split.getPlans()) {
                if (subPlan.getLeaves().get(0) instanceof POValueOutputTez) {
                    POValueOutputTez out = (POValueOutputTez) subPlan.getLeaves().get(0);
                    if (out.containsOutputKey(unionOpKey)) {
                        return subPlan;
                    }
                }
            }
        }

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez


                addSubPlanPropertiesToParent(tezOp, singleSplitee);


                removeSplittee(getPlan(), tezOp, singleSplitee);
            } else {
                POValueOutputTez valueOutput = (POValueOutputTez)tezOp.plan.getLeaves().get(0);
                POSplit split = new POSplit(OperatorKey.genOpKey(valueOutput.getOperatorKey().getScope()));
                split.copyAliasFrom(valueOutput);
                for (TezOperator splitee : splittees) {
                    PhysicalOperator spliteeRoot =  splitee.plan.getRoots().get(0);
                    splitee.plan.remove(spliteeRoot);
                    split.addPlan(splitee.plan);


                    addSubPlanPropertiesToParent(tezOp, splitee);


                    removeSplittee(getPlan(), tezOp, splitee);
                    valueOutput.removeOutputKey(splitee.getOperatorKey().toString());
                }
                if (valueOutput.getTezOutputs().length > 0) {
                    // We still need valueOutput
                    PhysicalPlan phyPlan = new PhysicalPlan();
                    phyPlan.addAsLeaf(valueOutput);
                    split.addPlan(phyPlan);
                }

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.