Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Source Code of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODemux

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;

import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;

/**
* The MapReduce Demultiplexer operator.
* <p>
* This operator is used when merging multiple Map-Reduce splittees
* into a Map-only splitter during multi-query optimization.
* The reduce physical plans of the splittees become the inner plans
* of this operator.
* <p>
* Due to the recursive nature of multi-query optimization, this operator
* may be contained in another demux operator.
* <p>
* The predecessor of this operator must be a POMultiQueryPackage
* operator which passes the index (indicating which inner reduce plan to run)
* along with other data to this operator.   
*/
public class PODemux extends PhysicalOperator {

    private static final long serialVersionUID = 1L;   
   
    private static int idxPart = 0x7F;
   
    private static Result empty = new Result(POStatus.STATUS_NULL, null);
   
    private static Result eop = new Result(POStatus.STATUS_EOP, null);
   
    /*
     * The list of sub-plans the inner plan is composed of
     */
    private ArrayList<PhysicalPlan> myPlans = new ArrayList<PhysicalPlan>();
   
    /*
     * Flag indicating when a new pull should start
     */
    private boolean getNext = true;
   
    /*
     * Flag indicating when a new pull should start.
     * It's used only when receiving the call
     * from reducer's close() method in the streaming case.
     */
    private boolean inpEOP = false;
   
    /*
     * The leaf of the current pipeline
     */
    private PhysicalOperator curLeaf = null;

   
    /**
     * The current pipeline plan
     */
    private PhysicalPlan curPlan = null;
   
    /*
     * Indicating if this operator is in a combiner.
     * If not, this operator is in a reducer and the key
     * values must first be extracted from the tuple-wrap
     * before writing out to the disk
     */
    private boolean inCombiner = false;
   
    BitSet processedSet = new BitSet();
   
    /**
     * Constructs an operator with the specified key.
     *
     * @param k the operator key
     */
    public PODemux(OperatorKey k) {
        this(k, -1, null);
    }

    /**
     * Constructs an operator with the specified key
     * and degree of parallelism.
     * 
     * @param k the operator key
     * @param rp the degree of parallelism requested
     */
    public PODemux(OperatorKey k, int rp) {
        this(k, rp, null);
    }

    /**
     * Constructs an operator with the specified key and inputs.
     * 
     * @param k the operator key
     * @param inp the inputs that this operator will read data from
     */
    public PODemux(OperatorKey k, List<PhysicalOperator> inp) {
        this(k, -1, inp);
    }

    /**
     * Constructs an operator with the specified key,
     * degree of parallelism and inputs.
     *
     * @param k the operator key
     * @param rp the degree of parallelism requested
     * @param inp the inputs that this operator will read data from
     */
    public PODemux(OperatorKey k, int rp, List<PhysicalOperator> inp) {
        super(k, rp, inp);
    }

    @Override
    public void visit(PhyPlanVisitor v) throws VisitorException {
        v.visitDemux(this);
    }

    @Override
    public String name() {
        return getAliasString() + "Demux [" + myPlans.size() + "] "+ mKey.toString();
    }

    @Override
    public boolean supportsMultipleInputs() {
        return false;
    }

    @Override
    public boolean supportsMultipleOutputs() {
        return false;
    }
   
    /**
     * Returns the list of inner plans.
     * 
     * @return the list of the nested plans
     */
    public List<PhysicalPlan> getPlans() {
        return myPlans;
    }
   
    /**
     * Appends the specified plan at the end of the list.
     *
     * @param inPlan plan to be appended to the inner plan list
     */
   
    public void addPlan(PhysicalPlan inPlan) { 
        myPlans.add(inPlan);
        processedSet.set(myPlans.size()-1);
    }
  
    @Override
    public Result getNextTuple() throws ExecException {
       
        if (!inCombiner && this.parentPlan.endOfAllInput) {
           
            // If there is a stream in one of the inner plans,
            // there could potentially be more to process - the
            // reducer sets the flag stating that all map input has
            // been sent already and runs the pipeline one more time
            // in its close() call.
            return getStreamCloseResult();                
       
        } else {
       
            if (getNext) {
                if(curPlan != null)
                    curPlan.detachInput();
                Result inp = processInput();
               
                if (inp.returnStatus == POStatus.STATUS_EOP) {
                    return inp;
                }
            
                curLeaf = attachInputWithIndex((Tuple)inp.result);
   
                getNext = false;
            }
           
            return runPipeline(curLeaf);     
        }
    }
   
    private Result runPipeline(PhysicalOperator leaf) throws ExecException {
      
        Result res = null;
       
        while (true) {
           
            res = leaf.getNextTuple();
           
            if (res.returnStatus == POStatus.STATUS_OK ||
                    res.returnStatus == POStatus.STATUS_EOP ||
                    res.returnStatus == POStatus.STATUS_ERR) {               
                break;
            } else if (res.returnStatus == POStatus.STATUS_NULL) {
                continue;
            }
        }  
       
        if (res.returnStatus == POStatus.STATUS_EOP) {
            getNext = true;
        }
        return (res.returnStatus == POStatus.STATUS_OK || res.returnStatus == POStatus.STATUS_ERR) ? res : empty;
    }

    private Result getStreamCloseResult() throws ExecException {
        Result res = null;
      
        while (true) {
                      
            if (processedSet.cardinality() == myPlans.size()) {
                curLeaf = null;
                if(curPlan != null)
                    curPlan.detachInput();
                Result inp = processInput();
                if (inp.returnStatus == POStatus.STATUS_OK) {               
                    attachInputWithIndex((Tuple)inp.result);
                    inpEOP = false;
                } else if (inp.returnStatus == POStatus.STATUS_EOP){
                    inpEOP = true;
                } else if (inp.returnStatus == POStatus.STATUS_NULL) {
                    inpEOP = false;
                } else if (inp.returnStatus == POStatus.STATUS_ERR) {
                    return inp;
                }           
                processedSet.clear();
            }
           
            int idx = processedSet.nextClearBit(0);
            PhysicalOperator leaf = myPlans.get(idx).getLeaves().get(0);
           
            // a nested demux object is stored in multiple positions
            // of the inner plan list, corresponding to the indexes of
            // its inner plans; skip the object if it's already processed.
            if (curLeaf != null && leaf.getOperatorKey().equals(curLeaf.getOperatorKey())) {
                processedSet.set(idx++);
                if (idx < myPlans.size()) {
                    continue;
                } else {
                    res = eop;
                }
            } else {
                curLeaf = leaf;
                res = leaf.getNextTuple();
              
                if (res.returnStatus == POStatus.STATUS_EOP)  {
                    processedSet.set(idx++);       
                    if (idx < myPlans.size()) {
                        continue;
                    }
                } else {
                    break;
                }
           
            }
           
            if (!inpEOP && res.returnStatus == POStatus.STATUS_EOP) {
                continue;
            } else {
                break;
            }
        }
       
        return res;             
    }   
   
    private PhysicalOperator attachInputWithIndex(Tuple res) throws ExecException {
       
        // unwrap the first field of the tuple to get the wrapped value which
        // is expected by the inner plans, as well as the index of the associated
        // inner plan.
        PigNullableWritable fld = (PigNullableWritable)res.get(0);       
        // choose an inner plan to run based on the index set by
        // the POLocalRearrange operator and passed to this operator
        // by POMultiQueryPackage
        int index = fld.getIndex();
        index &= idxPart;                     

        curPlan = myPlans.get(index);
        if (!(curPlan.getRoots().get(0) instanceof PODemux)) {                            
            res.set(0, fld.getValueAsPigType());
        }
       
        curPlan.attachInput(res);
        return curPlan.getLeaves().get(0);
    }
   
    /**
     * Sets a flag indicating if this operator is
     * in a combiner.
     *
     * @param inCombiner true if this operator is in
     * a combiner; false if this operator is in a reducer
     */
    public void setInCombiner(boolean inCombiner) {
        this.inCombiner = inCombiner;
    }

    /**
     * Returns a flag indicating if this operator is
     * in a combiner.
     *
     * @return true if this operator is in a combiner;
     * otherwise this operator is in a reducer
     */
    public boolean isInCombiner() {
        return inCombiner;
    }
   
    @Override
    public Tuple illustratorMarkup(Object in, Object out, int eqClassIndex) {
        // nothing need to be done here
        return null;
    }
       
}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODemux

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.