Package org.apache.pig.backend.hadoop.executionengine.tez.util

Source Code of org.apache.pig.backend.hadoop.executionengine.tez.util.TezCompilerUtil

package org.apache.pig.backend.hadoop.executionengine.tez.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.util.PlanHelper;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezEdgeDescriptor;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperPlan;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperator;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POLocalRearrangeTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POStoreTez;
import org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez;
import org.apache.pig.backend.hadoop.executionengine.tez.runtime.RoundRobinPartitioner;
import org.apache.pig.data.DataType;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
import org.apache.tez.runtime.library.input.UnorderedKVInput;
import org.apache.tez.runtime.library.output.UnorderedKVOutput;
import org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput;

import com.google.common.collect.Lists;

public class TezCompilerUtil {

    public static String TUPLE_CLASS = TupleFactory.getInstance().tupleClass().getName();

    private TezCompilerUtil() {
    }

    // simpleConnectTwoVertex is a utility to end a vertex equivalent to map and start vertex equivalent to
    // reduce in a tez operator:
    // 1. op1 is open
    // 2. op2 is blank
    //    POPackage to start a reduce vertex
    // 3. POLocalRearrange/POPackage are trivial
    // 4. User need to connect op1 to op2 themselves
    static public void simpleConnectTwoVertex(TezOperPlan tezPlan, TezOperator op1, TezOperator op2, String scope, NodeIdGenerator nig) throws PlanException
    {
        PhysicalPlan ep = new PhysicalPlan();
        POProject prjStar = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prjStar.setResultType(DataType.TUPLE);
        prjStar.setStar(true);
        ep.add(prjStar);

        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep);

        POLocalRearrangeTez lr = new POLocalRearrangeTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
            int errCode = 2058;
            String msg = "Unable to set index on the newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.TUPLE);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        lr.setOutputKey(op2.getOperatorKey().toString());

        op1.plan.addAsLeaf(lr);

        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.getPkgr().setKeyType(DataType.TUPLE);
        pkg.setNumInps(1);
        boolean[] inner = {false};
        pkg.getPkgr().setInner(inner);
        op2.plan.add(pkg);

        op2.plan.addAsLeaf(getForEachPlain(scope, nig));

        connect(tezPlan, op1, op2);
    }

    static public TezEdgeDescriptor connect(TezOperPlan plan, TezOperator from, TezOperator to) throws PlanException {
        plan.connect(from, to);
        if (!from.plan.isEmpty()) {
            PhysicalOperator leaf = from.plan.getLeaves().get(0);
            // It could be POStoreTez incase of sampling job in order by
            if (leaf instanceof POLocalRearrangeTez) {
                POLocalRearrangeTez lr = (POLocalRearrangeTez) leaf;
                lr.setOutputKey(to.getOperatorKey().toString());
            }
        }
        // Add edge descriptors to old and new operators
        TezEdgeDescriptor edge = new TezEdgeDescriptor();
        to.inEdges.put(from.getOperatorKey(), edge);
        from.outEdges.put(to.getOperatorKey(), edge);
        return edge;
    }

    static public void connect(TezOperPlan plan, TezOperator from, TezOperator to, TezEdgeDescriptor edge) throws PlanException {
        plan.connect(from, to);
        if (from.plan.getLeaves()!=null && !from.plan.getLeaves().isEmpty()) {
            PhysicalOperator leaf = from.plan.getLeaves().get(0);
            // It could be POStoreTez incase of sampling job in order by
            if (leaf instanceof POLocalRearrangeTez) {
                POLocalRearrangeTez lr = (POLocalRearrangeTez) leaf;
                lr.setOutputKey(to.getOperatorKey().toString());
            }
        }
        // Add edge descriptors to old and new operators
        to.inEdges.put(from.getOperatorKey(), edge);
        from.outEdges.put(to.getOperatorKey(), edge);
    }

    static public POForEach getForEach(POProject project, int rp, String scope, NodeIdGenerator nig) {
        PhysicalPlan forEachPlan = new PhysicalPlan();
        forEachPlan.add(project);

        List<PhysicalPlan> forEachPlans = Lists.newArrayList();
        forEachPlans.add(forEachPlan);

        List<Boolean> flatten = Lists.newArrayList();
        flatten.add(true);

        POForEach forEach = new POForEach(new OperatorKey(scope, nig.getNextNodeId(scope)), rp, forEachPlans, flatten);
        forEach.setResultType(DataType.BAG);
        return forEach;
    }

    // Get a plain POForEach: ForEach X generate flatten($1)
    static public POForEach getForEachPlain(String scope, NodeIdGenerator nig) {
        POProject project = new POProject(new OperatorKey(scope, nig.getNextNodeId(scope)));
        project.setResultType(DataType.TUPLE);
        project.setStar(false);
        project.setColumn(1);
        project.setOverloaded(true);
        return getForEach(project, -1, scope, nig);
    }

    static public POStore getStore(String scope, NodeIdGenerator nig) {
        POStore st = new POStoreTez(new OperatorKey(scope, nig.getNextNodeId(scope)));
        // mark store as tmp store. These could be removed by the
        // optimizer, because it wasn't the user requesting it.
        st.setIsTmpStore(true);
        return st;
    }

    static public void setCustomPartitioner(String customPartitioner, TezOperator tezOp) throws IOException {
        if (customPartitioner != null) {
            for (TezEdgeDescriptor edge : tezOp.inEdges.values()) {
                edge.partitionerClass = PigContext.resolveClassName(customPartitioner);
            }
        }
    }

    // Used with POValueOutputTez
    static public void configureValueOnlyTupleOutput(TezEdgeDescriptor edge, DataMovementType dataMovementType) {
        edge.dataMovementType = dataMovementType;
        if (dataMovementType == DataMovementType.BROADCAST || dataMovementType == DataMovementType.ONE_TO_ONE) {
            edge.outputClassName = UnorderedKVOutput.class.getName();
            edge.inputClassName = UnorderedKVInput.class.getName();
        } else if (dataMovementType == DataMovementType.SCATTER_GATHER) {
            edge.outputClassName = UnorderedPartitionedKVOutput.class.getName();
            edge.inputClassName = UnorderedKVInput.class.getName();
            edge.partitionerClass = RoundRobinPartitioner.class;
        }
        edge.setIntermediateOutputKeyClass(POValueOutputTez.EmptyWritable.class.getName());
        edge.setIntermediateOutputValueClass(TUPLE_CLASS);
    }

    /**
     * Returns true if there are no loads or stores in a TezOperator.
     * To be called only after LoaderProcessor is called
     */
    static public boolean isIntermediateReducer(TezOperator tezOper) throws VisitorException {
        boolean intermediateReducer = false;
        LinkedList<POStore> stores = PlanHelper.getPhysicalOperators(tezOper.plan, POStore.class);
        // Not map and not final reducer
        if (stores.size() <= 0 &&
                (tezOper.getLoaderInfo().getLoads() == null || tezOper.getLoaderInfo().getLoads().size() <= 0)) {
            intermediateReducer = true;
        }
        return intermediateReducer;
    }

}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.util.TezCompilerUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.