Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Source Code of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;

import java.util.List;

import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduceCounter;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.pen.util.ExampleTuple;
import org.apache.pig.pen.util.LineageTracer;

/**
* This operator is part of the RANK operator implementation.
* It adds a local counter and a unique task id to each tuple.
* There are 2 modes of operations: regular and dense.
* The local counter is depends on the mode of operation.
* With regular rank is considered duplicate rows while assigning
* numbers to distinct values groups.
* With dense rank counts the number of distinct values, without
* considering duplicate rows. Depending on if it is considered.
* the entire tuple (row number) or a by a set of columns (rank by).
*
* This Physical Operator relies on some specific MR class,
* available at PigMapReduceCounter.
**/

public class POCounter extends PhysicalOperator {

    private static final long serialVersionUID = 1L;
    private static final Long ONE = 1L;

    private List<PhysicalPlan> counterPlans;
    private List<Boolean> mAscCols;

    /**
     * In case of RANK BY, it could by dense or not.
     * Being a dense rank means to assign consecutive ranks
     * to different values.
     **/
    private boolean isDenseRank = false;

    /**
     * In case of simple RANK, namely row number mode
     * which is a consecutive number assigned to each tuple.
     **/
    private boolean isRowNumber = false;

    protected static final TupleFactory mTupleFactory = TupleFactory.getInstance();

    /**
     * Local counter for tuples on the same task.
     **/
    private Long localCount = 1L;

    /**
     * Task ID to label each tuple analyzed by the corresponding task
     **/
    private String taskID = "-1";

    /**
     * Unique identifier that links POCounter and PORank,
     * through the global counter labeled with it.
     **/
    private String operationID;

    public POCounter(OperatorKey k) {
        this(k, -1, null);
    }

    public POCounter(OperatorKey k, int rp) {
        this(k, rp, null);
    }

    public POCounter(OperatorKey k, List<PhysicalOperator> inputs) {
        this(k, -1, inputs);
    }

    public POCounter(OperatorKey k, int rp, List<PhysicalOperator> inputs) {
        super(k, rp, inputs);
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    public POCounter(OperatorKey operatorKey, int requestedParallelism,
            List inp, List<PhysicalPlan> counterPlans,
            List<Boolean> ascendingCol) {
        super(operatorKey, requestedParallelism, inp);
        this.setCounterPlans(counterPlans);
        this.setAscendingColumns(ascendingCol);
    }

    @Override
    public Tuple illustratorMarkup(Object in, Object out, int eqClassIndex) {
        if(illustrator != null){
            return new ExampleTuple((Tuple)out);
        }
        return (Tuple) out;
    }

    @Override
    public void visit(PhyPlanVisitor v) throws VisitorException {
        v.visitCounter(this);
    }

    @Override
    public Result getNextTuple() throws ExecException {
        Result inp = null;

        while (true) {
            inp = processInput();
            if (inp.returnStatus == POStatus.STATUS_EOP
                    || inp.returnStatus == POStatus.STATUS_ERR)
                break;
            if (inp.returnStatus == POStatus.STATUS_NULL) {
                continue;
            }

            return addCounterValue(inp);
        }
        return inp;
    }

    /**
     * Add current task id and local counter value.
     * @param input from the previous output
     * @return  a tuple within two values prepended to the tuple
     * the task identifier and the local counter value.
     * Local counter value could be incremented by one (is a row number or dense rank)
     * or, could be incremented by the size of the bag on the previous tuple processed
     **/
    protected Result addCounterValue(Result input) throws ExecException {
        Tuple in = (Tuple) input.result;
        Tuple out = mTupleFactory.newTuple(in.getAll().size() + 2);
        Long sizeBag = 0L;
        int positionBag, i = 2;

        // Tuples are added by two stamps before the tuple content:
        // 1.- At position 0: Current taskId
        out.set(0, getTaskId());

        // 2.- At position 1: counter value
        //On this case, each tuple is analyzed independently of the tuples grouped
        if(isRowNumber() || isDenseRank()) {

            //Only when is Dense Rank (attached to a reduce phase) it is incremented on this way
            //Otherwise, the increment is done at mapper automatically
            if(isDenseRank())
                PigMapReduceCounter.PigReduceCounter.incrementCounter(POCounter.ONE);

            out.set(1, getLocalCounter());

            //and the local incrementer is sequentially increased.
            incrementLocalCounter();

        } else if(!isDenseRank()) {
            //Standard rank: On this case is important the
            //number of tuples on the same group.
            positionBag = in.getAll().size()-1;
            if (in.getType(positionBag) == DataType.BAG) {
                sizeBag = ((org.apache.pig.data.DefaultAbstractBag)in.get(positionBag)).size();
            }

            //This value (the size of the tuples on the bag) is used to increment
            //the current global counter and
            PigMapReduceCounter.PigReduceCounter.incrementCounter(sizeBag);

            out.set(1, getLocalCounter());

            //the value for the next tuple on the current task
            addToLocalCounter(sizeBag);

        }

        for (Object o : in) {
            out.set(i++, o);
        }

        input.result = illustratorMarkup(in, out, 0);

        return input;
    }

    @Override
    public boolean supportsMultipleInputs() {
        return false;
    }

    @Override
    public boolean supportsMultipleOutputs() {
        return false;
    }

    @Override
    public String name() {
        return getAliasString() + "POCounter" + "["
        + DataType.findTypeName(resultType) + "]" + " - "
        + mKey.toString();
    }

    public void setCounterPlans(List<PhysicalPlan> counterPlans) {
        this.counterPlans = counterPlans;
    }

    public List<PhysicalPlan> getCounterPlans() {
        return counterPlans;
    }

    public void setAscendingColumns(List<Boolean> mAscCols) {
        this.mAscCols = mAscCols;
    }

    public List<Boolean> getAscendingColumns() {
        return mAscCols;
    }

    /**
     *  Initialization step into the POCounter is to set
     *  up local counter to 1.
     **/
    public void resetLocalCounter() {
        this.localCount = 1L;
    }

    /**
     *  Sequential counter used at ROW NUMBER and RANK BY DENSE mode
     **/
    public Long incrementLocalCounter() {
        return localCount++;
    }

    public void setLocalCounter(Long localCount) {
        this.localCount = localCount;
    }

    public Long getLocalCounter() {
        return this.localCount;
    }

    public void addToLocalCounter(Long sizeBag) {
        this.localCount += sizeBag;
    }

    /**
     *  Task ID: identifier of the task (map or reducer)
     **/
    public void setTaskId(String taskID) {
        this.taskID = taskID;
    }

    public String getTaskId() {
        return this.taskID;
    }

    /**
     *  Dense Rank flag
     **/
    public void setIsDenseRank(boolean isDenseRank) {
        this.isDenseRank = isDenseRank;
    }

    public boolean isDenseRank() {
        return isDenseRank;
    }

    /**
     *  Row number flag
     **/
    public void setIsRowNumber(boolean isRowNumber) {
        this.isRowNumber = isRowNumber;
    }

    public boolean isRowNumber() {
        return isRowNumber;
    }

    /**
     *  Operation ID: identifier shared within the corresponding PORank
     **/
    public void setOperationID(String operationID) {
        this.operationID = operationID;
    }

    public String getOperationID() {
        return operationID;
    }
}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.