Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Source Code of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMultiQueryPackage

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.io.NullableUnknownWritable;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.backend.hadoop.HDataType;

/**
* The package operator that packages the globally rearranged tuples
* into output format as required by multi-query de-multiplexer.
* <p>
* This operator is used when merging multiple Map-Reduce splittees
* into a Map-only splitter during multi-query optimization.
* The package operators of the reduce plans of the splittees form an
* indexed package list inside this operator. When this operator
* receives an input, it extracts the index from the key and calls the
* corresponding package to get the output data.
* <p>
* Due to the recursive nature of multi-query optimization, this operator
* may be contained in another multi-query packager.
* <p>
* The successor of this operator must be a PODemux operator which
* knows how to consume the output of this operator.
*/
public class POMultiQueryPackage extends POPackage {
   
    private static final long serialVersionUID = 1L;
   
    private static int idxPart = 0x7F;

    private List<POPackage> packages = new ArrayList<POPackage>();

    /**
     * If the POLocalRearranges corresponding to the reduce plans in
     * myPlans (the list of inner plans of the demux) have different key types
     * then the MultiQueryOptimizer converts all the keys to be of type tuple
     * by wrapping any non-tuple keys into Tuples (keys which are already tuples
     * are left alone).
     * The list below is a list of booleans indicating whether extra tuple wrapping
     * was done for the key in the corresponding POLocalRearranges and if we need
     * to "unwrap" the tuple to get to the key
     */
    private ArrayList<Boolean> isKeyWrapped = new ArrayList<Boolean>();
   
    /*
     * Indicating if all the inner plans have the same
     * map key type. If not, the keys passed in are
     * wrapped inside tuples and need to be extracted
     * out during the reduce phase
     */
    private boolean sameMapKeyType = true;
   
    /*
     * Indicating if this operator is in a combiner.
     * If not, this operator is in a reducer and the key
     * values must first be extracted from the tuple-wrap
     * before writing out to the disk
     */
    private boolean inCombiner = false;
   
    transient private PigNullableWritable myKey;

    /**
     * Constructs an operator with the specified key.
     *
     * @param k the operator key
     */
    public POMultiQueryPackage(OperatorKey k) {
        this(k, -1, null);
    }

    /**
     * Constructs an operator with the specified key
     * and degree of parallelism.
     * 
     * @param k the operator key
     * @param rp the degree of parallelism requested
     */
    public POMultiQueryPackage(OperatorKey k, int rp) {
        this(k, rp, null);
    }

    /**
     * Constructs an operator with the specified key and inputs.
     * 
     * @param k the operator key
     * @param inp the inputs that this operator will read data from
     */
    public POMultiQueryPackage(OperatorKey k, List<PhysicalOperator> inp) {
        this(k, -1, inp);
    }

    /**
     * Constructs an operator with the specified key,
     * degree of parallelism and inputs.
     *
     * @param k the operator key
     * @param rp the degree of parallelism requested
     * @param inp the inputs that this operator will read data from
     */
    public POMultiQueryPackage(OperatorKey k, int rp, List<PhysicalOperator> inp) {
        super(k, rp, inp);
    }

    @Override
    public String name() {
        return "MultiQuery Package [" + isKeyWrapped + "] - " +  getOperatorKey().toString();
    }

    @Override
    public boolean supportsMultipleInputs() {
        return false;
    }

    @Override
    public void visit(PhyPlanVisitor v) throws VisitorException {
        v.visitMultiQueryPackage(this);
    }

    @Override
    public boolean supportsMultipleOutputs() {
        return false;
    }
   
    @Override
    public void attachInput(PigNullableWritable k, Iterator<NullableTuple> inp) {
        tupIter = inp;
        myKey = k;
    }

    @Override
    public void detachInput() {
        tupIter = null;
        myKey = null;
    }

    /**
     * Appends the specified package object to the end of
     * the package list.
     *
     * @param pack package to be appended to the list
     */
    public void addPackage(POPackage pack) {
        packages.add(pack);       
    }
   
    /**
     * Appends the specified package object to the end of
     * the package list.
     *
     * @param pack package to be appended to the list
     * @param mapKeyType the map key type associated with the package
     */
    public void addPackage(POPackage pack, byte mapKeyType) {
        packages.add(pack);       
        // if mapKeyType is already a tuple, we will NOT
        // be wrapping it in an extra tuple. If it is not
        // a tuple, we will wrap into in a tuple
        isKeyWrapped.add(mapKeyType == DataType.TUPLE ? false : true);
    }

    /**
     * Returns the list of packages.
     * 
     * @return the list of the packages
     */
    public List<POPackage> getPackages() {
        return packages;
    }

    /**
     * Constructs the output tuple from the inputs.
     * <p>
     * The output is consumed by for the demultiplexer operator
     * (PODemux) in the format (key, {bag of tuples}) where key
     * is an indexed WritableComparable, not the wrapped value as a pig type.
     */
    @Override
    public Result getNextTuple() throws ExecException {
       
        byte origIndex = myKey.getIndex();

        int index = (int)origIndex;
        index &= idxPart;
       
        if (index >= packages.size() || index < 0) {
            int errCode = 2140;
            String msg = "Invalid package index " + index
                + " should be in the range between 0 and " + packages.size();
            throw new ExecException(msg, errCode, PigException.BUG);
        }
                 
        POPackage pack = packages.get(index);
       
        // check to see if we need to unwrap the key. The keys may be
        // wrapped inside a tuple by LocalRearrange operator when jobs 
        // with different map key types are merged
        PigNullableWritable curKey = myKey;
        if (!sameMapKeyType && !inCombiner && isKeyWrapped.get(index)) {                                      
            Tuple tup = (Tuple)myKey.getValueAsPigType();
            curKey = HDataType.getWritableComparableTypes(tup.get(0), pack.getKeyType());
            curKey.setIndex(origIndex);
        }
           
        pack.attachInput(curKey, tupIter);

        Result res = pack.getNextTuple();
        pack.detachInput();
       
        Tuple tuple = (Tuple)res.result;

        // the object present in the first field
        // of the tuple above is the real data without
        // index information - this is because the
        // package above, extracts the real data out of
        // the PigNullableWritable object - we are going to
        // give this result tuple to a PODemux operator
        // which needs a PigNullableWritable first field so
        // it can figure out the index. Therefore we need
        // to add index to the first field of the tuple.
               
        Object obj = tuple.get(0);
        if (obj instanceof PigNullableWritable) {
            ((PigNullableWritable)obj).setIndex(origIndex);
        }
        else {
            PigNullableWritable myObj = null;
            if (obj == null) {
                myObj = new NullableUnknownWritable();
                myObj.setNull(true);
            }
            else {
                myObj = HDataType.getWritableComparableTypes(obj, HDataType.findTypeFromNullableWritable(curKey));
            }
            myObj.setIndex(origIndex);
            tuple.set(0, myObj);
        }
        // illustrator markup has been handled by "pack"
        return res;
    }

    /**
     * Returns the list of booleans that indicates if the
     * key needs to unwrapped for the corresponding plan.
     *
     * @return the list of isKeyWrapped boolean values
     */
    public List<Boolean> getIsKeyWrappedList() {
        return Collections.unmodifiableList(isKeyWrapped);
    }
   
    /**
     * Adds a list of IsKeyWrapped boolean values
     *
     * @param lst the list of boolean values to add
     */
    public void addIsKeyWrappedList(List<Boolean> lst) {
        for (Boolean b : lst) {
            isKeyWrapped.add(b);
        }
    }
   
    public void setInCombiner(boolean inCombiner) {
        this.inCombiner = inCombiner;
    }

    public boolean isInCombiner() {
        return inCombiner;
    }

    public void setSameMapKeyType(boolean sameMapKeyType) {
        this.sameMapKeyType = sameMapKeyType;
    }

    public boolean isSameMapKeyType() {
        return sameMapKeyType;
    }

}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMultiQueryPackage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.