Package org.apache.hadoop.hive.ql.optimizer.correlation

Source Code of org.apache.hadoop.hive.ql.optimizer.correlation.QueryPlanTreeTransformation

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.optimizer.correlation;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.DemuxOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer.CorrelationNodeProcCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.DemuxDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.MuxDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;

/**
* QueryPlanTreeTransformation contains static methods used to transform
* the query plan tree (operator tree) based on the correlation we have
* detected by Correlation Optimizer.
*/
public class QueryPlanTreeTransformation {
  private static final Log LOG = LogFactory.getLog(QueryPlanTreeTransformation.class.getName());

  private static void setNewTag(IntraQueryCorrelation correlation,
      List<Operator<? extends OperatorDesc>> childrenOfDemux,
      ReduceSinkOperator rsop, Map<ReduceSinkOperator, Integer> bottomRSToNewTag)
          throws SemanticException {
    int newTag = bottomRSToNewTag.get(rsop);
    int oldTag = rsop.getConf().getTag();
    if (oldTag == -1) {
      // if this child of DemuxOperator does not use tag, we just set the oldTag to 0.
      oldTag = 0;
    }
    Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(rsop, true);
    if (!childrenOfDemux.contains(child)) {
      childrenOfDemux.add(child);
    }
    int childIndex = childrenOfDemux.indexOf(child);
    correlation.setNewTag(newTag, oldTag, childIndex);
    rsop.getConf().setTag(newTag);
  }

  /**
   * Based on the correlation, we transform the query plan tree (operator tree).
   * In here, we first create DemuxOperator and all bottom ReduceSinkOperators
   * (bottom means near TableScanOperaotr) in the correlation will be be
   * the parents of the DemuxOperaotr. We also reassign tags to those
   * ReduceSinkOperators. Then, we use MuxOperators to replace ReduceSinkOperators
   * which are not bottom ones in this correlation.
   * Example: The original operator tree is ...
   *      JOIN2
   *      /    \
   *     RS4   RS5
   *    /        \
   *   GBY1     JOIN1
   *    |       /    \
   *   RS1     RS2   RS3
   * If GBY1, JOIN1, and JOIN2 can be executed in the same reducer
   * (optimized by Correlation Optimizer).
   * The new operator tree will be ...
   *      JOIN2
   *        |
   *       MUX
   *      /   \
   *    GBY1  JOIN1
   *      \    /
   *       DEMUX
   *      /  |  \
   *     /   |   \
   *    /    |    \
   *   RS1   RS2   RS3
   * @param pCtx
   * @param corrCtx
   * @param correlation
   * @throws SemanticException
   */
  protected static void applyCorrelation(
      ParseContext pCtx,
      CorrelationNodeProcCtx corrCtx,
      IntraQueryCorrelation correlation)
      throws SemanticException {

    final List<ReduceSinkOperator> bottomReduceSinkOperators =
        correlation.getBottomReduceSinkOperators();
    final int numReducers = correlation.getNumReducers();
    List<Operator<? extends OperatorDesc>> childrenOfDemux =
        new ArrayList<Operator<? extends OperatorDesc>>();
    List<Operator<? extends OperatorDesc>> parentRSsOfDemux =
        new ArrayList<Operator<? extends OperatorDesc>>();
    Map<Integer, Integer> childIndexToOriginalNumParents =
        new HashMap<Integer, Integer>();
    List<TableDesc> keysSerializeInfos = new ArrayList<TableDesc>();
    List<TableDesc> valuessSerializeInfos = new ArrayList<TableDesc>();
    Map<ReduceSinkOperator, Integer> bottomRSToNewTag =
        new HashMap<ReduceSinkOperator, Integer>();
    int newTag = 0;
    for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
      rsop.getConf().setNumReducers(numReducers);
      bottomRSToNewTag.put(rsop, newTag);
      parentRSsOfDemux.add(rsop);
      keysSerializeInfos.add(rsop.getConf().getKeySerializeInfo());
      valuessSerializeInfos.add(rsop.getConf().getValueSerializeInfo());
      Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(rsop, true);
      if (!childrenOfDemux.contains(child)) {
        childrenOfDemux.add(child);
        int childIndex = childrenOfDemux.size() - 1;
        childIndexToOriginalNumParents.put(childIndex, child.getNumParent());
      }
      newTag++;
    }

    for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
      setNewTag(correlation, childrenOfDemux, rsop, bottomRSToNewTag);
    }

    // Create the DemuxOperaotr
    DemuxDesc demuxDesc =
        new DemuxDesc(
            correlation.getNewTagToOldTag(),
            correlation.getNewTagToChildIndex(),
            childIndexToOriginalNumParents,
            keysSerializeInfos,
            valuessSerializeInfos);
    Operator<? extends OperatorDesc> demuxOp = OperatorFactory.get(demuxDesc);
    demuxOp.setChildOperators(childrenOfDemux);
    demuxOp.setParentOperators(parentRSsOfDemux);
    for (Operator<? extends OperatorDesc> child: childrenOfDemux) {
      List<Operator<? extends OperatorDesc>> parentsWithMultipleDemux =
          new ArrayList<Operator<? extends OperatorDesc>>();
      boolean hasBottomReduceSinkOperators = false;
      boolean hasNonBottomReduceSinkOperators = false;
      for (int i = 0; i < child.getParentOperators().size(); i++) {
        Operator<? extends OperatorDesc> p = child.getParentOperators().get(i);
        assert p instanceof ReduceSinkOperator;
        ReduceSinkOperator rsop = (ReduceSinkOperator)p;
        if (bottomReduceSinkOperators.contains(rsop)) {
          hasBottomReduceSinkOperators = true;
          parentsWithMultipleDemux.add(demuxOp);
        } else {
          hasNonBottomReduceSinkOperators = true;
          parentsWithMultipleDemux.add(rsop);
        }
      }
      if (hasBottomReduceSinkOperators && hasNonBottomReduceSinkOperators) {
        child.setParentOperators(parentsWithMultipleDemux);
      } else {
        child.setParentOperators(Utilities.makeList(demuxOp));
      }
    }
    for (Operator<? extends OperatorDesc> parent: parentRSsOfDemux) {
      parent.setChildOperators(Utilities.makeList(demuxOp));
    }

    // replace all ReduceSinkOperators which are not at the bottom of
    // this correlation to MuxOperators
    Set<ReduceSinkOperator> handledRSs = new HashSet<ReduceSinkOperator>();
    for (ReduceSinkOperator rsop : correlation.getAllReduceSinkOperators()) {
      if (!bottomReduceSinkOperators.contains(rsop)) {
        if (handledRSs.contains(rsop)) {
          continue;
        }
        Operator<? extends OperatorDesc> childOP =
            CorrelationUtilities.getSingleChild(rsop, true);
        if (childOP instanceof GroupByOperator) {
          CorrelationUtilities.removeReduceSinkForGroupBy(
              rsop, (GroupByOperator)childOP, pCtx, corrCtx);
          List<Operator<? extends OperatorDesc>> parentsOfMux =
              new ArrayList<Operator<? extends OperatorDesc>>();
          Operator<? extends OperatorDesc> parentOp =
              CorrelationUtilities.getSingleParent(childOP, true);
          parentsOfMux.add(parentOp);
          Operator<? extends OperatorDesc> mux = OperatorFactory.get(
              new MuxDesc(parentsOfMux));
          mux.setChildOperators(Utilities.makeList(childOP));
          mux.setParentOperators(parentsOfMux);
          childOP.setParentOperators(Utilities.makeList(mux));
          parentOp.setChildOperators(Utilities.makeList(mux));
        } else {
          List<Operator<? extends OperatorDesc>> parentsOfMux =
              new ArrayList<Operator<? extends OperatorDesc>>();
          List<Operator<? extends OperatorDesc>> siblingOPs =
              CorrelationUtilities.findSiblingOperators(rsop);
          for (Operator<? extends OperatorDesc> op: siblingOPs) {
            if (op instanceof DemuxOperator) {
              parentsOfMux.add(op);
            } else if (op instanceof ReduceSinkOperator){
              GroupByOperator pGBYm =
                  CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
              if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) {
                // We get a semi join at here.
                // This map-side GroupByOperator needs to be removed
                CorrelationUtilities.removeOperator(
                    pGBYm, op, CorrelationUtilities.getSingleParent(pGBYm, true), pCtx);
              }
              handledRSs.add((ReduceSinkOperator)op);
              parentsOfMux.add(CorrelationUtilities.getSingleParent(op, true));
            } else {
              throw new SemanticException("An slibing of ReduceSinkOperator is nethier a " +
                  "DemuxOperator nor a ReduceSinkOperator");
            }
          }
          MuxDesc muxDesc = new MuxDesc(siblingOPs);
          Operator<? extends OperatorDesc> mux = OperatorFactory.get(muxDesc);
          mux.setChildOperators(Utilities.makeList(childOP));
          mux.setParentOperators(parentsOfMux);

          for (Operator<? extends OperatorDesc> op: parentsOfMux) {
            if (op instanceof DemuxOperator) {
              // op is a DemuxOperator and it directly connects to childOP.
              // We will add this MuxOperator between DemuxOperator
              // and childOP.
              if (op.getChildOperators().contains(childOP)) {
                op.replaceChild(childOP, mux);
              }
            } else {
              // op is not a DemuxOperator, so it should have
              // a single child.
              op.setChildOperators(Utilities.makeList(mux));
            }
          }
          childOP.setParentOperators(Utilities.makeList(mux));
        }
      }
    }
    for (ReduceSinkOperator rsop: handledRSs) {
      rsop.setChildOperators(null);
      rsop.setParentOperators(null);
      pCtx.getOpParseCtx().remove(rsop);
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.optimizer.correlation.QueryPlanTreeTransformation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.