Source Code of org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.OpTraitsRulesProcFactory$MultiParentRule

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.hadoop.hive.ql.optimizer.metainfo.annotation;


import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Stack;


import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.AbstractBucketJoinProc;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;


/*
 * This class populates the following operator traits for the entire operator tree:
 * 1. Bucketing columns.
 * 2. Table
 * 3. Pruned partitions
 * 
 * Bucketing columns refer to not to the bucketing columns from the table object but instead
 * to the dynamic 'bucketing' done by operators such as reduce sinks and group-bys.
 * All the operators have a translation from their input names to the output names corresponding
 * to the bucketing column. The colExprMap that is a part of every operator is used in this
 * transformation.
 * 
 * The table object is used for the base-case in map-reduce when deciding to perform a bucket
 * map join. This object is used in the BucketMapJoinProc to find if number of files for the
 * table correspond to the number of buckets specified in the meta data.
 * 
 * The pruned partition information has the same purpose as the table object at the moment.
 * 
 * The traits of sorted-ness etc. can be populated as well for future optimizations to make use of.
 */


public class OpTraitsRulesProcFactory {


  public static class DefaultRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      @SuppressWarnings("unchecked")
      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>)nd;
      op.setOpTraits(op.getParentOperators().get(0).getOpTraits());
      return null;
    }


  }


  /*
   * Reduce sink operator is the de-facto operator 
   * for determining keyCols (emit keys of a map phase)
   */
  public static class ReduceSinkRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {


      ReduceSinkOperator rs = (ReduceSinkOperator)nd;
      List<String> bucketCols = new ArrayList<String>();
      if (rs.getColumnExprMap() != null) {
        for (ExprNodeDesc exprDesc : rs.getConf().getKeyCols()) {
          for (Entry<String, ExprNodeDesc> entry : rs.getColumnExprMap().entrySet()) {
            if (exprDesc.isSame(entry.getValue())) {
              bucketCols.add(entry.getKey());
            }
          }
        }
      }


      List<List<String>> listBucketCols = new ArrayList<List<String>>();
      listBucketCols.add(bucketCols);
      OpTraits opTraits = new OpTraits(listBucketCols, -1);
      rs.setOpTraits(opTraits);
      return null;
    }
  }


  /*
   * Table scan has the table object and pruned partitions that has information such as
   * bucketing, sorting, etc. that is used later for optimization.
   */
  public static class TableScanRule implements NodeProcessor {


    public boolean checkBucketedTable(Table tbl, 
        ParseContext pGraphContext,
        PrunedPartitionList prunedParts) throws SemanticException {


      if (tbl.isPartitioned()) {
        List<Partition> partitions = prunedParts.getNotDeniedPartns();
        // construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
        if (!partitions.isEmpty()) {
          for (Partition p : partitions) {
            List<String> fileNames =
                AbstractBucketJoinProc.getBucketFilePathsOfPartition(p.getDataLocation(), pGraphContext);
            // The number of files for the table should be same as number of buckets.
            int bucketCount = p.getBucketCount();


            if (fileNames.size() != 0 && fileNames.size() != bucketCount) {
              return false;
            }
          }
        }
      } else {


        List<String> fileNames =
            AbstractBucketJoinProc.getBucketFilePathsOfPartition(tbl.getDataLocation(), pGraphContext);
        Integer num = new Integer(tbl.getNumBuckets());


        // The number of files for the table should be same as number of buckets.
        if (fileNames.size() != 0 && fileNames.size() != num) {
          return false;
        }
      }


      return true;
    }


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator ts = (TableScanOperator)nd;
      AnnotateOpTraitsProcCtx opTraitsCtx = (AnnotateOpTraitsProcCtx)procCtx;
      Table table = opTraitsCtx.getParseContext().getTopToTable().get(ts);
      PrunedPartitionList prunedPartList = null;
      try {
        prunedPartList =
            opTraitsCtx.getParseContext().getPrunedPartitions(ts.getConf().getAlias(), ts);
      } catch (HiveException e) {
        prunedPartList = null;
      }
      boolean bucketMapJoinConvertible = checkBucketedTable(table, 
          opTraitsCtx.getParseContext(), prunedPartList);
      List<List<String>>bucketCols = new ArrayList<List<String>>();
      int numBuckets = -1;
      if (bucketMapJoinConvertible) {
        bucketCols.add(table.getBucketCols());
        numBuckets = table.getNumBuckets();
      }
      OpTraits opTraits = new OpTraits(bucketCols, numBuckets);
      ts.setOpTraits(opTraits);
      return null;
    }
  }


  /*
   * Group-by re-orders the keys emitted hence, the keyCols would change.
   */
  public static class GroupByRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator gbyOp = (GroupByOperator)nd;
      List<String> gbyKeys = new ArrayList<String>();
      for (ExprNodeDesc exprDesc : gbyOp.getConf().getKeys()) {
        for (Entry<String, ExprNodeDesc> entry : gbyOp.getColumnExprMap().entrySet()) {
          if (exprDesc.isSame(entry.getValue())) {
            gbyKeys.add(entry.getKey());
          }
        }
      }


      List<List<String>> listBucketCols = new ArrayList<List<String>>();
      listBucketCols.add(gbyKeys);
      OpTraits opTraits = new OpTraits(listBucketCols, -1);
      gbyOp.setOpTraits(opTraits);
      return null;
    }
  }


  public static class SelectRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      SelectOperator selOp = (SelectOperator)nd;
      List<List<String>> parentBucketColNames = 
          selOp.getParentOperators().get(0).getOpTraits().getBucketColNames();


      List<List<String>> listBucketCols = new ArrayList<List<String>>();
      if (selOp.getColumnExprMap() != null) {
        if (parentBucketColNames != null) {
          for (List<String> colNames : parentBucketColNames) {
            List<String> bucketColNames = new ArrayList<String>();
            for (String colName : colNames) {
              for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) {
                if (entry.getValue() instanceof ExprNodeColumnDesc) {
                  if(((ExprNodeColumnDesc)(entry.getValue())).getColumn().equals(colName)) {
                    bucketColNames.add(entry.getKey());
                  }
                }
              }
            }
            listBucketCols.add(bucketColNames);
          }
        }
      }


      int numBuckets = -1;
      if (selOp.getParentOperators().get(0).getOpTraits() != null) {
        numBuckets = selOp.getParentOperators().get(0).getOpTraits().getNumBuckets();
      }
      OpTraits opTraits = new OpTraits(listBucketCols, numBuckets);
      selOp.setOpTraits(opTraits);
      return null;
    }
  }


  public static class JoinRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      JoinOperator joinOp = (JoinOperator)nd;
      List<List<String>> bucketColsList = new ArrayList<List<String>>();
      byte pos = 0;
      for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (!(parentOp instanceof ReduceSinkOperator)) {
          // can be mux operator
          break;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator)parentOp;
        if (rsOp.getOpTraits() == null) {
          ReduceSinkRule rsRule = new ReduceSinkRule();
          rsRule.process(rsOp, stack, procCtx, nodeOutputs);
        }
        bucketColsList.add(getOutputColNames(joinOp, rsOp, pos));
        pos++;
      }


      joinOp.setOpTraits(new OpTraits(bucketColsList, -1));
      return null;
    }


    private List<String> getOutputColNames(JoinOperator joinOp,
        ReduceSinkOperator rs, byte pos) {
      List<List<String>> parentBucketColNames =
          rs.getOpTraits().getBucketColNames();


      if (parentBucketColNames != null) {
        List<String> bucketColNames = new ArrayList<String>();


        // guaranteed that there is only 1 list within this list because
        // a reduce sink always brings down the bucketing cols to a single list.
        // may not be true with correlation operators (mux-demux)
        List<String> colNames = parentBucketColNames.get(0);
        for (String colName : colNames) {
          for (ExprNodeDesc exprNode : joinOp.getConf().getExprs().get(pos)) {
            if (exprNode instanceof ExprNodeColumnDesc) {
              if(((ExprNodeColumnDesc)(exprNode)).getColumn().equals(colName)) {
                for (Entry<String, ExprNodeDesc> entry : joinOp.getColumnExprMap().entrySet()) {
                  if (entry.getValue().isSame(exprNode)) {
                    bucketColNames.add(entry.getKey());
                    // we have found the colName
                    break;
                  }
                }
              } else {
                // continue on to the next exprNode to find a match
                continue;
              }
              // we have found the colName. No need to search more exprNodes.
              break;
            }
          }
        }


        return bucketColNames;
      }


      // no col names in parent
      return null;
    }
  }


  /*
   *  When we have operators that have multiple parents, it is not
   *  clear which parent's traits we need to propagate forward.
   */
  public static class MultiParentRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      OpTraits opTraits = new OpTraits(null, -1);
      @SuppressWarnings("unchecked")
      Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>)nd;
      operator.setOpTraits(opTraits);
      return null;
    } 
  }


  public static NodeProcessor getTableScanRule() {
    return new TableScanRule();
  }


  public static NodeProcessor getReduceSinkRule() {
    return new ReduceSinkRule();
  }
  
  public static NodeProcessor getSelectRule() {
    return new SelectRule();
  }


  public static NodeProcessor getDefaultRule() {
    return new DefaultRule();
  }


  public static NodeProcessor getMultiParentRule() {
    return new MultiParentRule();
  }


  public static NodeProcessor getGroupByRule() {
    return new GroupByRule();
  }


  public static NodeProcessor getJoinRule() {
    return new JoinRule();
  }
}
Source Code of org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.OpTraitsRulesProcFactory$MultiParentRule

Related Classes of org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.OpTraitsRulesProcFactory$MultiParentRule