Source Code of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$SortBucketRSCtx

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.hadoop.hive.ql.parse;


import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;


import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMROperator;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4;
import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1;
import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.InputFormat;


/**
 * Implementation of the semantic analyzer.
 */


public class SemanticAnalyzer extends BaseSemanticAnalyzer {
  private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
  private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
  private HashMap<String, Operator<? extends Serializable>> topOps;
  private HashMap<String, Operator<? extends Serializable>> topSelOps;
  private LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx;
  private List<LoadTableDesc> loadTableWork;
  private List<LoadFileDesc> loadFileWork;
  private Map<JoinOperator, QBJoinTree> joinContext;
  private final HashMap<TableScanOperator, Table> topToTable;
  private QB qb;
  private ASTNode ast;
  private int destTableId;
  private UnionProcContext uCtx;
  List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer;
  private HashMap<TableScanOperator, sampleDesc> opToSamplePruner;
  Map<GroupByOperator, Set<String>> groupOpToInputTables;
  Map<String, PrunedPartitionList> prunedPartitions;
  private List<FieldSchema> resultSchema;
  private CreateViewDesc createVwDesc;
  private ASTNode viewSelect;
  private final UnparseTranslator unparseTranslator;


  private static class Phase1Ctx {
    String dest;
    int nextNum;
  }


  public SemanticAnalyzer(HiveConf conf) throws SemanticException {


    super(conf);
    opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
    opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>();
    opToSamplePruner = new HashMap<TableScanOperator, sampleDesc>();
    topOps = new HashMap<String, Operator<? extends Serializable>>();
    topSelOps = new HashMap<String, Operator<? extends Serializable>>();
    loadTableWork = new ArrayList<LoadTableDesc>();
    loadFileWork = new ArrayList<LoadFileDesc>();
    opParseCtx = new LinkedHashMap<Operator<? extends Serializable>, OpParseContext>();
    joinContext = new HashMap<JoinOperator, QBJoinTree>();
    topToTable = new HashMap<TableScanOperator, Table>();
    destTableId = 1;
    uCtx = null;
    listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
    groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
    prunedPartitions = new HashMap<String, PrunedPartitionList>();
    unparseTranslator = new UnparseTranslator();
  }


  @Override
  protected void reset() {
    super.reset();
    loadTableWork.clear();
    loadFileWork.clear();
    topOps.clear();
    topSelOps.clear();
    destTableId = 1;
    idToTableNameMap.clear();
    qb = null;
    ast = null;
    uCtx = null;
    joinContext.clear();
    opParseCtx.clear();
    groupOpToInputTables.clear();
    prunedPartitions.clear();
  }


  public void init(ParseContext pctx) {
    opToPartPruner = pctx.getOpToPartPruner();
    opToPartList = pctx.getOpToPartList();
    opToSamplePruner = pctx.getOpToSamplePruner();
    topOps = pctx.getTopOps();
    topSelOps = pctx.getTopSelOps();
    opParseCtx = pctx.getOpParseCtx();
    loadTableWork = pctx.getLoadTableWork();
    loadFileWork = pctx.getLoadFileWork();
    joinContext = pctx.getJoinContext();
    ctx = pctx.getContext();
    destTableId = pctx.getDestTableId();
    idToTableNameMap = pctx.getIdToTableNameMap();
    uCtx = pctx.getUCtx();
    listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
    qb = pctx.getQB();
    groupOpToInputTables = pctx.getGroupOpToInputTables();
    prunedPartitions = pctx.getPrunedPartitions();
    setLineageInfo(pctx.getLineageInfo());
  }


  public ParseContext getParseContext() {
    return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps,
        topSelOps, opParseCtx, joinContext, topToTable, loadTableWork,
        loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
        opToSamplePruner);
  }


  @SuppressWarnings("nls")
  public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
      throws SemanticException {


    assert (ast.getToken() != null);
    switch (ast.getToken().getType()) {
    case HiveParser.TOK_QUERY: {
      QB qb = new QB(id, alias, true);
      doPhase1(ast, qb, initPhase1Ctx());
      qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
      qbexpr.setQB(qb);
    }
      break;
    case HiveParser.TOK_UNION: {
      qbexpr.setOpcode(QBExpr.Opcode.UNION);
      // query 1
      assert (ast.getChild(0) != null);
      QBExpr qbexpr1 = new QBExpr(alias + "-subquery1");
      doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1",
          alias + "-subquery1");
      qbexpr.setQBExpr1(qbexpr1);


      // query 2
      assert (ast.getChild(0) != null);
      QBExpr qbexpr2 = new QBExpr(alias + "-subquery2");
      doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2",
          alias + "-subquery2");
      qbexpr.setQBExpr2(qbexpr2);
    }
      break;
    }
  }


  private LinkedHashMap<String, ASTNode> doPhase1GetAggregationsFromSelect(
      ASTNode selExpr) {
    // Iterate over the selects search for aggregation Trees.
    // Use String as keys to eliminate duplicate trees.
    LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>();
    for (int i = 0; i < selExpr.getChildCount(); ++i) {
      ASTNode sel = (ASTNode) selExpr.getChild(i).getChild(0);
      doPhase1GetAllAggregations(sel, aggregationTrees);
    }
    return aggregationTrees;
  }


  private void doPhase1GetColumnAliasesFromSelect(
      ASTNode selectExpr, QBParseInfo qbp) {
    for (int i = 0; i < selectExpr.getChildCount(); ++i) {
      ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
      if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) && (selExpr.getChildCount() == 2)) {
        String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
        qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
      }
    }
  }


  /**
   * DFS-scan the expressionTree to find all aggregation subtrees and put them
   * in aggregations.
   *
   * @param expressionTree
   * @param aggregations
   *          the key to the HashTable is the toStringTree() representation of
   *          the aggregation subtree.
   */
  private void doPhase1GetAllAggregations(ASTNode expressionTree,
      HashMap<String, ASTNode> aggregations) {
    int exprTokenType = expressionTree.getToken().getType();
    if (exprTokenType == HiveParser.TOK_FUNCTION
        || exprTokenType == HiveParser.TOK_FUNCTIONDI
        || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
      assert (expressionTree.getChildCount() != 0);
      if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
        String functionName = unescapeIdentifier(expressionTree.getChild(0)
            .getText());
        if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
          aggregations.put(expressionTree.toStringTree(), expressionTree);
          FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
          if (!fi.isNative()) {
            unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
                .getChild(0));
          }
          return;
        }
      }
    }
    for (int i = 0; i < expressionTree.getChildCount(); i++) {
      doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
          aggregations);
    }
  }


  private List<ASTNode> doPhase1GetDistinctFuncExprs(
      HashMap<String, ASTNode> aggregationTrees) throws SemanticException {
    List<ASTNode> exprs = new ArrayList<ASTNode>();
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      ASTNode value = entry.getValue();
      assert (value != null);
      if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
        exprs.add(value);
      }
    }
    return exprs;
  }


  /**
   * Goes though the tabref tree and finds the alias for the table. Once found,
   * it records the table name-> alias association in aliasToTabs. It also makes
   * an association from the alias to the table AST in parse info.
   *
   * @return the alias of the table
   */
  private String processTable(QB qb, ASTNode tabref) throws SemanticException {
    // For each table reference get the table name
    // and the alias (if alias is not present, the table name
    // is used as an alias)
    boolean tableSamplePresent = false;
    int aliasIndex = 0;
    if (tabref.getChildCount() == 2) {
      // tablename tablesample
      // OR
      // tablename alias
      ASTNode ct = (ASTNode) tabref.getChild(1);
      if (ct.getToken().getType() == HiveParser.TOK_TABLESAMPLE) {
        tableSamplePresent = true;
      } else {
        aliasIndex = 1;
      }
    } else if (tabref.getChildCount() == 3) {
      // table name table sample alias
      aliasIndex = 2;
      tableSamplePresent = true;
    }
    ASTNode tableTree = (ASTNode) (tabref.getChild(0));
    String alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
    // If the alias is already there then we have a conflict
    if (qb.exists(alias)) {
      throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
          .getChild(aliasIndex)));
    }
    if (tableSamplePresent) {
      ASTNode sampleClause = (ASTNode) tabref.getChild(1);
      ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>();
      if (sampleClause.getChildCount() > 2) {
        for (int i = 2; i < sampleClause.getChildCount(); i++) {
          sampleCols.add((ASTNode) sampleClause.getChild(i));
        }
      }
      // TODO: For now only support sampling on up to two columns
      // Need to change it to list of columns
      if (sampleCols.size() > 2) {
        throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref
            .getChild(0)));
      }
      qb.getParseInfo().setTabSample(
          alias,
          new TableSample(
          unescapeIdentifier(sampleClause.getChild(0).getText()),
          unescapeIdentifier(sampleClause.getChild(1).getText()),
          sampleCols));
      if (unparseTranslator.isEnabled()) {
        for (ASTNode sampleCol : sampleCols) {
          unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
              .getChild(0));
        }
      }
    }
    // Insert this map into the stats
    String table_name = unescapeIdentifier(tabref.getChild(0).getText());
    qb.setTabAlias(alias, table_name);
    qb.addAlias(alias);


    qb.getParseInfo().setSrcForAlias(alias, tableTree);


    unparseTranslator.addIdentifierTranslation(tableTree);
    if (aliasIndex != 0) {
      unparseTranslator.addIdentifierTranslation((ASTNode) tabref
          .getChild(aliasIndex));
    }


    return alias;
  }


  private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {


    // This is a subquery and must have an alias
    if (subq.getChildCount() != 2) {
      throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
    }
    ASTNode subqref = (ASTNode) subq.getChild(0);
    String alias = unescapeIdentifier(subq.getChild(1).getText());


    // Recursively do the first phase of semantic analysis for the subquery
    QBExpr qbexpr = new QBExpr(alias);


    doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias);


    // If the alias is already there then we have a conflict
    if (qb.exists(alias)) {
      throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
          .getChild(1)));
    }
    // Insert this map into the stats
    qb.setSubqAlias(alias, qbexpr);
    qb.addAlias(alias);


    unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));


    return alias;
  }


  private boolean isJoinToken(ASTNode node) {
    if ((node.getToken().getType() == HiveParser.TOK_JOIN)
        || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
        || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
        || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
        || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
        || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
      return true;
    }


    return false;
  }


  /**
   * Given the AST with TOK_JOIN as the root, get all the aliases for the tables
   * or subqueries in the join.
   *
   * @param qb
   * @param join
   * @throws SemanticException
   */
  @SuppressWarnings("nls")
  private void processJoin(QB qb, ASTNode join) throws SemanticException {
    int numChildren = join.getChildCount();
    if ((numChildren != 2) && (numChildren != 3)
        && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
      throw new SemanticException("Join with multiple children");
    }


    for (int num = 0; num < numChildren; num++) {
      ASTNode child = (ASTNode) join.getChild(num);
      if (child.getToken().getType() == HiveParser.TOK_TABREF) {
        processTable(qb, child);
      } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
        processSubQuery(qb, child);
      } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
        // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
        // is not supported. Instead, the lateral view must be in a subquery
        // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
        // JOIN src2 ...
        throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
            .getMsg(join));
      } else if (isJoinToken(child)) {
        processJoin(qb, child);
      }
    }
  }


  /**
   * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
   * table or subquery in the lateral view and also make a mapping from the
   * alias to all the lateral view AST's.
   *
   * @param qb
   * @param lateralView
   * @return the alias for the table/subquery
   * @throws SemanticException
   */


  private String processLateralView(QB qb, ASTNode lateralView)
      throws SemanticException {
    int numChildren = lateralView.getChildCount();


    assert (numChildren == 2);
    ASTNode next = (ASTNode) lateralView.getChild(1);


    String alias = null;


    switch (next.getToken().getType()) {
    case HiveParser.TOK_TABREF:
      alias = processTable(qb, next);
      break;
    case HiveParser.TOK_SUBQUERY:
      alias = processSubQuery(qb, next);
      break;
    case HiveParser.TOK_LATERAL_VIEW:
      alias = processLateralView(qb, next);
      break;
    default:
      throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
          .getMsg(lateralView));
    }
    alias = alias.toLowerCase();
    qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
    qb.addAlias(alias);
    return alias;
  }


  /**
   * Phase 1: (including, but not limited to):
   *
   * 1. Gets all the aliases for all the tables / subqueries and makes the
   * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
   * destination and names the clase "inclause" + i 3. Creates a map from a
   * string representation of an aggregation tree to the actual aggregation AST
   * 4. Creates a mapping from the clause name to the select expression AST in
   * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
   * AST's in aliasToLateralViews
   *
   * @param ast
   * @param qb
   * @param ctx_1
   * @throws SemanticException
   */
  @SuppressWarnings({"fallthrough", "nls"})
  public void doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1)
      throws SemanticException {


    QBParseInfo qbp = qb.getParseInfo();
    boolean skipRecursion = false;


    if (ast.getToken() != null) {
      skipRecursion = true;
      switch (ast.getToken().getType()) {
      case HiveParser.TOK_SELECTDI:
        qb.countSelDi();
        // fall through
      case HiveParser.TOK_SELECT:
        qb.countSel();
        qbp.setSelExprForClause(ctx_1.dest, ast);


        if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
          qbp.setHints((ASTNode) ast.getChild(0));
        }


        LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast);
        doPhase1GetColumnAliasesFromSelect(ast, qbp);
        qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
        qbp.setDistinctFuncExprsForClause(ctx_1.dest,
            doPhase1GetDistinctFuncExprs(aggregations));
        break;


      case HiveParser.TOK_WHERE:
        qbp.setWhrExprForClause(ctx_1.dest, ast);
        break;


      case HiveParser.TOK_DESTINATION:
        ctx_1.dest = "insclause-" + ctx_1.nextNum;
        ctx_1.nextNum++;


        // is there a insert in the subquery
        if (qbp.getIsSubQ()) {
          ASTNode ch = (ASTNode) ast.getChild(0);
          if ((ch.getToken().getType() != HiveParser.TOK_DIR)
              || (((ASTNode) ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) {
            throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY
                .getMsg(ast));
          }
        }


        qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
        break;


      case HiveParser.TOK_FROM:
        int child_count = ast.getChildCount();
        if (child_count != 1) {
          throw new SemanticException("Multiple Children " + child_count);
        }


        // Check if this is a subquery / lateral view
        ASTNode frm = (ASTNode) ast.getChild(0);
        if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
          processTable(qb, frm);
        } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
          processSubQuery(qb, frm);
        } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
          processLateralView(qb, frm);
        } else if (isJoinToken(frm)) {
          processJoin(qb, frm);
          qbp.setJoinExpr(frm);
        }
        break;


      case HiveParser.TOK_CLUSTERBY:
        // Get the clusterby aliases - these are aliased to the entries in the
        // select list
        qbp.setClusterByExprForClause(ctx_1.dest, ast);
        break;


      case HiveParser.TOK_DISTRIBUTEBY:
        // Get the distribute by aliases - these are aliased to the entries in
        // the
        // select list
        qbp.setDistributeByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT
              .getMsg(ast));
        } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
          throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT
              .getMsg(ast));
        }
        break;


      case HiveParser.TOK_SORTBY:
        // Get the sort by aliases - these are aliased to the entries in the
        // select list
        qbp.setSortByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT
              .getMsg(ast));
        } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
          throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT
              .getMsg(ast));
        }


        break;


      case HiveParser.TOK_ORDERBY:
        // Get the order by aliases - these are aliased to the entries in the
        // select list
        qbp.setOrderByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT
              .getMsg(ast));
        }
        break;


      case HiveParser.TOK_GROUPBY:
        // Get the groupby aliases - these are aliased to the entries in the
        // select list
        if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
          throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY
              .getMsg(ast));
        }
        qbp.setGroupByExprForClause(ctx_1.dest, ast);
        skipRecursion = true;
        break;


      case HiveParser.TOK_HAVING:
        qbp.setHavingExprForClause(ctx_1.dest, ast);
        qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast));
        break;


      case HiveParser.TOK_LIMIT:
        qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()));
        break;


      case HiveParser.TOK_ANALYZE:
        // Case of analyze command
        String table_name = unescapeIdentifier(ast.getChild(0).getChild(0).getText());
        qb.setTabAlias(table_name, table_name);
        qb.addAlias(table_name);
        qb.getParseInfo().setIsAnalyzeCommand(true);
        // Allow analyze the whole table and dynamic partitions
        HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
        HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");


        break;


      case HiveParser.TOK_UNION:
        // currently, we dont support subq1 union subq2 - the user has to
        // explicitly say:
        // select * from (subq1 union subq2) subqalias
        if (!qbp.getIsSubQ()) {
          throw new SemanticException(ErrorMsg.UNION_NOTIN_SUBQ.getMsg());
        }


      default:
        skipRecursion = false;
        break;
      }
    }


    if (!skipRecursion) {
      // Iterate over the rest of the children
      int child_count = ast.getChildCount();
      for (int child_pos = 0; child_pos < child_count; ++child_pos) {


        // Recurse
        doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1);
      }
    }
  }


  private void getMetaData(QBExpr qbexpr) throws SemanticException {
    if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
      getMetaData(qbexpr.getQB());
    } else {
      getMetaData(qbexpr.getQBExpr1());
      getMetaData(qbexpr.getQBExpr2());
    }
  }


  @SuppressWarnings("nls")
  public void getMetaData(QB qb) throws SemanticException {
    try {


      LOG.info("Get metadata for source tables");


      // Go over the tables and populate the related structures.
      // We have to materialize the table alias list since we might
      // modify it in the middle for view rewrite.
      List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
      for (String alias : tabAliases) {
        String tab_name = qb.getTabNameForAlias(alias);
        Table tab = null;
        try {
          tab = db.getTable(tab_name);
        } catch (InvalidTableException ite) {
          throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb
              .getParseInfo().getSrcForAlias(alias)));
        }


        // We check offline of the table, as if people only select from an
        // non-existing partition of an offline table, the partition won't
        // be added to inputs and validate() won't have the information to
        // check the table's offline status.
        // TODO: Modify the code to remove the checking here and consolidate
        // it in validate()
        //
        if (tab.isOffline()) {
          throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
              getMsg("Table " + qb.getParseInfo().getSrcForAlias(alias)));
        }


        if (tab.isView()) {
          replaceViewReferenceWithDefinition(qb, tab, tab_name, alias);
          continue;
        }


        if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
          throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE
              .getMsg(qb.getParseInfo().getSrcForAlias(alias)));
        }


        qb.getMetaData().setSrcForAlias(alias, tab);


        if (qb.getParseInfo().isAnalyzeCommand()) {
          tableSpec ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0));
          if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
            try {
              ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
            } catch (HiveException e) {
              throw new SemanticException("Cannot get partitions for " + ts.partSpec, e);
            }
          }
          qb.getParseInfo().addTableSpec(alias, ts);
        }
      }


      LOG.info("Get metadata for subqueries");
      // Go over the subqueries and getMetaData for these
      for (String alias : qb.getSubqAliases()) {
        QBExpr qbexpr = qb.getSubqForAlias(alias);
        getMetaData(qbexpr);
      }


      LOG.info("Get metadata for destination tables");
      // Go over all the destination structures and populate the related
      // metadata
      QBParseInfo qbp = qb.getParseInfo();


      for (String name : qbp.getClauseNamesForDest()) {
        ASTNode ast = qbp.getDestForClause(name);
        switch (ast.getToken().getType()) {
        case HiveParser.TOK_TAB: {
          tableSpec ts = new tableSpec(db, conf, ast);
          if (ts.tableHandle.isView()) {
            throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
          }


          Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
          if (!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
            throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
                .getMsg(ast, "The class is " + outputFormatClass.toString()));
          }


          // tableSpec ts is got from the query (user specified),
          // which means the user didn't specify partitions in their query,
          // but whether the table itself is partitioned is not know.
          if (ts.specType != SpecType.STATIC_PARTITION) {
            // This is a table or dynamic partition
            qb.getMetaData().setDestForAlias(name, ts.tableHandle);
            // has dynamic as well as static partitions
            if (ts.partSpec != null && ts.partSpec.size() > 0) {
              qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
            }
          } else {
            // This is a partition
            qb.getMetaData().setDestForAlias(name, ts.partHandle);
          }
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            // Set that variable to automatically collect stats during the MapReduce job
            qb.getParseInfo().setIsInsertToTable(true);
            // Add the table spec for the destination table.
            qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
          }
          break;
        }


        case HiveParser.TOK_LOCAL_DIR:
        case HiveParser.TOK_DIR: {
          // This is a dfs file
          String fname = stripQuotes(ast.getChild(0).getText());
          if ((!qb.getParseInfo().getIsSubQ())
              && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {


            if (qb.isCTAS()) {
              qb.setIsQuery(false);
              ctx.setResDir(null);
              ctx.setResFile(null);


              // allocate a temporary output dir on the location of the table
              String location = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
              try {
                fname = ctx.getExternalTmpFileURI(
                    FileUtils.makeQualified(new Path(location), conf).toUri());
              } catch (Exception e) {
                throw new SemanticException("Error creating temporary folder on: " + location, e);
              }
            } else {
              qb.setIsQuery(true);
              fname = ctx.getMRTmpFileURI();
              ctx.setResDir(new Path(fname));
            }
          }
          qb.getMetaData().setDestForAlias(name, fname,
              (ast.getToken().getType() == HiveParser.TOK_DIR));
          break;
        }
        default:
          throw new SemanticException("Unknown Token Type "
              + ast.getToken().getType());
        }
      }
    } catch (HiveException e) {
      // Has to use full name to make sure it does not conflict with
      // org.apache.commons.lang.StringUtils
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }
  }


  private void replaceViewReferenceWithDefinition(QB qb, Table tab,
      String tab_name, String alias) throws SemanticException {


    ParseDriver pd = new ParseDriver();
    ASTNode viewTree;
    final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
        tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
        alias));
    try {
      String viewText = tab.getViewExpandedText();
      // Reparse text, passing null for context to avoid clobbering
      // the top-level token stream.
      ASTNode tree = pd.parse(viewText, null);
      tree = ParseUtils.findRootNonNullToken(tree);
      viewTree = tree;
      Dispatcher nodeOriginDispatcher = new Dispatcher() {
        public Object dispatch(Node nd, java.util.Stack<Node> stack,
            Object... nodeOutputs) {
          ((ASTNode) nd).setOrigin(viewOrigin);
          return null;
        }
      };
      GraphWalker nodeOriginTagger = new DefaultGraphWalker(
          nodeOriginDispatcher);
      nodeOriginTagger.startWalking(java.util.Collections
          .<Node> singleton(viewTree), null);
    } catch (ParseException e) {
      // A user could encounter this if a stored view definition contains
      // an old SQL construct which has been eliminated in a later Hive
      // version, so we need to provide full debugging info to help
      // with fixing the view definition.
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      StringBuilder sb = new StringBuilder();
      sb.append(e.getMessage());
      ErrorMsg.renderOrigin(sb, viewOrigin);
      throw new SemanticException(sb.toString(), e);
    }
    QBExpr qbexpr = new QBExpr(alias);
    doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias);
    qb.rewriteViewToSubq(alias, tab_name, qbexpr);
  }


  private boolean isPresent(String[] list, String elem) {
    for (String s : list) {
      if (s.equals(elem)) {
        return true;
      }
    }


    return false;
  }


  @SuppressWarnings("nls")
  private void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
      ArrayList<String> leftAliases, ArrayList<String> rightAliases,
      ArrayList<String> fields) throws SemanticException {
    // String[] allAliases = joinTree.getAllAliases();
    switch (condn.getToken().getType()) {
    case HiveParser.TOK_TABLE_OR_COL:
      String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
          .toLowerCase());
      unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
      if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
        if (!leftAliases.contains(tableOrCol)) {
          leftAliases.add(tableOrCol);
        }
      } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
        if (!rightAliases.contains(tableOrCol)) {
          rightAliases.add(tableOrCol);
        }
      } else {
        // We don't support columns without table prefix in JOIN condition right
        // now.
        // We need to pass Metadata here to know which table the column belongs
        // to.
        throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn
            .getChild(0)));
      }
      break;


    case HiveParser.Identifier:
      // it may be a field name, return the identifier and let the caller decide
      // whether it is or not
      if (fields != null) {
        fields
            .add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
      }
      unparseTranslator.addIdentifierTranslation(condn);
      break;
    case HiveParser.Number:
    case HiveParser.StringLiteral:
    case HiveParser.TOK_CHARSETLITERAL:
    case HiveParser.KW_TRUE:
    case HiveParser.KW_FALSE:
      break;


    case HiveParser.TOK_FUNCTION:
      // check all the arguments
      for (int i = 1; i < condn.getChildCount(); i++) {
        parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
            leftAliases, rightAliases, null);
      }
      break;


    default:
      // This is an operator - so check whether it is unary or binary operator
      if (condn.getChildCount() == 1) {
        parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
            leftAliases, rightAliases, null);
      } else if (condn.getChildCount() == 2) {


        ArrayList<String> fields1 = null;
        // if it is a dot operator, remember the field name of the rhs of the
        // left semijoin
        if (joinTree.getNoSemiJoin() == false
            && condn.getToken().getType() == HiveParser.DOT) {
          // get the semijoin rhs table name and field name
          fields1 = new ArrayList<String>();
          int rhssize = rightAliases.size();
          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
              leftAliases, rightAliases, null);
          String rhsAlias = null;


          if (rightAliases.size() > rhssize) { // the new table is rhs table
            rhsAlias = rightAliases.get(rightAliases.size() - 1);
          }


          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
              leftAliases, rightAliases, fields1);
          if (rhsAlias != null && fields1.size() > 0) {
            joinTree.addRHSSemijoinColumns(rhsAlias, condn);
          }
        } else {
          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
              leftAliases, rightAliases, null);
          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
              leftAliases, rightAliases, fields1);
        }
      } else {
        throw new SemanticException(condn.toStringTree() + " encountered with "
            + condn.getChildCount() + " children");
      }
      break;
    }
  }


  private void populateAliases(ArrayList<String> leftAliases,
      ArrayList<String> rightAliases, ASTNode condn, QBJoinTree joinTree,
      ArrayList<String> leftSrc) throws SemanticException {
    if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
      throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
          .getMsg(condn));
    }


    if (rightAliases.size() != 0) {
      assert rightAliases.size() == 1;
      joinTree.getExpressions().get(1).add(condn);
    } else if (leftAliases.size() != 0) {
      joinTree.getExpressions().get(0).add(condn);
      for (String s : leftAliases) {
        if (!leftSrc.contains(s)) {
          leftSrc.add(s);
        }
      }
    } else {
      throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
          .getMsg(condn));
    }
  }


  /**
   * Parse the join condition. If the condition is a join condition, throw an
   * error if it is not an equality. Otherwise, break it into left and right
   * expressions and store in the join tree. If the condition is a join filter,
   * add it to the filter list of join tree. The join condition can contains
   * conditions on both the left and tree trees and filters on either.
   * Currently, we only support equi-joins, so we throw an error if the
   * condition involves both subtrees and is not a equality. Also, we only
   * support AND i.e ORs are not supported currently as their semantics are not
   * very clear, may lead to data explosion and there is no usecase.
   *
   * @param joinTree
   *          jointree to be populated
   * @param joinCond
   *          join condition
   * @param leftSrc
   *          left sources
   * @throws SemanticException
   */
  private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
      ArrayList<String> leftSrc) throws SemanticException {
    if (joinCond == null) {
      return;
    }


    JoinType type = joinTree.getJoinCond()[0].getJoinType();
    switch (joinCond.getToken().getType()) {
    case HiveParser.KW_OR:
      throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
          .getMsg(joinCond));


    case HiveParser.KW_AND:
      parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc);
      parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc);
      break;


    case HiveParser.EQUAL:
      ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
      ArrayList<String> leftCondAl1 = new ArrayList<String>();
      ArrayList<String> leftCondAl2 = new ArrayList<String>();
      parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
          null);


      ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
      ArrayList<String> rightCondAl1 = new ArrayList<String>();
      ArrayList<String> rightCondAl2 = new ArrayList<String>();
      parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
          rightCondAl2, null);


      // is it a filter or a join condition
      // if it is filter see if it can be pushed above the join
      // filter cannot be pushed if
      // * join is full outer or
      // * join is left outer and filter is on left alias or
      // * join is right outer and filter is on right alias
      if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
          || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
        throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
            .getMsg(joinCond));
      }


      if (leftCondAl1.size() != 0) {
        if ((rightCondAl1.size() != 0)
            || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
          if (type.equals(JoinType.LEFTOUTER) ||
                type.equals(JoinType.FULLOUTER)) {
            if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
              joinTree.getFilters().get(0).add(joinCond);
            } else {
              LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
              joinTree.getFiltersForPushing().get(0).add(joinCond);
            }
          } else {
            joinTree.getFiltersForPushing().get(0).add(joinCond);
          }
        } else if (rightCondAl2.size() != 0) {
          populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
              leftSrc);
          populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
              leftSrc);
        }
      } else if (leftCondAl2.size() != 0) {
        if ((rightCondAl2.size() != 0)
            || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
          if (type.equals(JoinType.RIGHTOUTER)
              || type.equals(JoinType.FULLOUTER)) {
            if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
              joinTree.getFilters().get(1).add(joinCond);
            } else {
              LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
              joinTree.getFiltersForPushing().get(1).add(joinCond);
            }
          } else {
            joinTree.getFiltersForPushing().get(1).add(joinCond);
          }
        } else if (rightCondAl1.size() != 0) {
          populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
              leftSrc);
          populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
              leftSrc);
        }
      } else if (rightCondAl1.size() != 0) {
        if (type.equals(JoinType.LEFTOUTER)
            || type.equals(JoinType.FULLOUTER)) {
          if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
            joinTree.getFilters().get(0).add(joinCond);
          } else {
            LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
            joinTree.getFiltersForPushing().get(0).add(joinCond);
          }
        } else {
          joinTree.getFiltersForPushing().get(0).add(joinCond);
        }
      } else {
        if (type.equals(JoinType.RIGHTOUTER)
            || type.equals(JoinType.FULLOUTER)) {
          if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
            joinTree.getFilters().get(1).add(joinCond);
          } else {
            LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
            joinTree.getFiltersForPushing().get(1).add(joinCond);
          }
        } else {
          joinTree.getFiltersForPushing().get(1).add(joinCond);
        }
      }


      break;


    default:
      boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);


      // Create all children
      int childrenBegin = (isFunction ? 1 : 0);
      ArrayList<ArrayList<String>> leftAlias = new ArrayList<ArrayList<String>>(
          joinCond.getChildCount() - childrenBegin);
      ArrayList<ArrayList<String>> rightAlias = new ArrayList<ArrayList<String>>(
          joinCond.getChildCount() - childrenBegin);
      for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
        ArrayList<String> left = new ArrayList<String>();
        ArrayList<String> right = new ArrayList<String>();
        leftAlias.add(left);
        rightAlias.add(right);
      }


      for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
        parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
            leftAlias.get(ci - childrenBegin), rightAlias.get(ci
            - childrenBegin), null);
      }


      boolean leftAliasNull = true;
      for (ArrayList<String> left : leftAlias) {
        if (left.size() != 0) {
          leftAliasNull = false;
          break;
        }
      }


      boolean rightAliasNull = true;
      for (ArrayList<String> right : rightAlias) {
        if (right.size() != 0) {
          rightAliasNull = false;
          break;
        }
      }


      if (!leftAliasNull && !rightAliasNull) {
        throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
            .getMsg(joinCond));
      }


      if (!leftAliasNull) {
        if (type.equals(JoinType.LEFTOUTER)
            || type.equals(JoinType.FULLOUTER)) {
          if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
            joinTree.getFilters().get(0).add(joinCond);
          } else {
            LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
            joinTree.getFiltersForPushing().get(0).add(joinCond);
          }
        } else {
          joinTree.getFiltersForPushing().get(0).add(joinCond);
        }
      } else {
        if (type.equals(JoinType.RIGHTOUTER)
            || type.equals(JoinType.FULLOUTER)) {
          if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
            joinTree.getFilters().get(1).add(joinCond);
          } else {
            LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
            joinTree.getFiltersForPushing().get(1).add(joinCond);
          }
        } else {
          joinTree.getFiltersForPushing().get(1).add(joinCond);
        }
      }


      break;
    }
  }


  @SuppressWarnings("nls")
  public <T extends Serializable> Operator<T> putOpInsertMap(Operator<T> op,
      RowResolver rr) {
    OpParseContext ctx = new OpParseContext(rr);
    opParseCtx.put(op, ctx);
    op.augmentPlan();
    return op;
  }


  @SuppressWarnings("nls")
  private Operator genHavingPlan(String dest, QB qb, Operator input)
      throws SemanticException {


    ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);


    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    Map<ASTNode, String> exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
    for (ASTNode astNode : exprToColumnAlias.keySet()) {
      if (inputRR.getExpression(astNode) != null) {
        inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
      }
    }
    ASTNode condn = (ASTNode) havingExpr.getChild(0);


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
        inputRR.getColumnInfos()), input), inputRR);


    return output;
  }


  @SuppressWarnings("nls")
  private Operator genFilterPlan(String dest, QB qb, Operator input)
      throws SemanticException {


    ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
    return genFilterPlan(qb, (ASTNode) whereExpr.getChild(0), input);
  }


  /**
   * create a filter plan. The condition and the inputs are specified.
   *
   * @param qb
   *          current query block
   * @param condn
   *          The condition to be resolved
   * @param input
   *          the input operator
   */
  @SuppressWarnings("nls")
  private Operator genFilterPlan(QB qb, ASTNode condn, Operator input)
      throws SemanticException {


    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
        inputRR.getColumnInfos()), input), inputRR);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
                + inputRR.toString());
    }
    return output;
  }


  @SuppressWarnings("nls")
  private Integer genColListRegex(String colRegex, String tabAlias,
      ASTNode sel, ArrayList<ExprNodeDesc> col_list,
      RowResolver input, Integer pos, RowResolver output, List<String> aliases)
      throws SemanticException {


    // The table alias should exist
    if (tabAlias != null && !input.hasTableAlias(tabAlias)) {
      throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
    }


    // TODO: Have to put in the support for AS clause
    Pattern regex = null;
    try {
      regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
    } catch (PatternSyntaxException e) {
      throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
          .getMessage()));
    }


    StringBuilder replacementText = new StringBuilder();
    int matched = 0;
    // add empty string to the list of aliases. Some operators (ex. GroupBy) add
    // ColumnInfos for table alias "".
    if (!aliases.contains("")) {
      aliases.add("");
    }
    // For expr "*", aliases should be iterated in the order they are specified
    // in the query.
    for (String alias : aliases) {
      HashMap<String, ColumnInfo> fMap = input.getFieldMap(alias);
      if (fMap == null) {
        continue;
      }
      // For the tab.* case, add all the columns to the fieldList
      // from the input schema
      for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
        ColumnInfo colInfo = entry.getValue();
        String name = colInfo.getInternalName();
        String[] tmp = input.reverseLookup(name);


        // Skip the colinfos which are not for this particular alias
        if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
          continue;
        }


        if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
          continue;
        }


        // Not matching the regex?
        if (!regex.matcher(tmp[1]).matches()) {
          continue;
        }


        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
            name, colInfo.getTabAlias(), colInfo.getIsVirtualCol());
        col_list.add(expr);
        output.put(tmp[0], tmp[1],
            new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
            colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
            colInfo.isHiddenVirtualCol()));
        pos = Integer.valueOf(pos.intValue() + 1);
        matched++;


        if (unparseTranslator.isEnabled()) {
          if (replacementText.length() > 0) {
            replacementText.append(", ");
          }
          replacementText.append(HiveUtils.unparseIdentifier(tmp[0]));
          replacementText.append(".");
          replacementText.append(HiveUtils.unparseIdentifier(tmp[1]));
        }
      }
    }
    if (matched == 0) {
      throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
    }


    if (unparseTranslator.isEnabled()) {
      unparseTranslator.addTranslation(sel, replacementText.toString());
    }
    return pos;
  }


  public static String getColumnInternalName(int pos) {
    return HiveConf.getColumnInternalName(pos);
  }


  private String getScriptProgName(String cmd) {
    int end = cmd.indexOf(" ");
    return (end == -1) ? cmd : cmd.substring(0, end);
  }


  private String getScriptArgs(String cmd) {
    int end = cmd.indexOf(" ");
    return (end == -1) ? "" : cmd.substring(end, cmd.length());
  }


  private String fetchFilesNotInLocalFilesystem(String cmd) {
    SessionState ss = SessionState.get();
    String progName = getScriptProgName(cmd);


    if (progName.matches("("+ SessionState.getMatchingSchemaAsRegex() +")://.*")) {
      String filePath = ss.add_resource(ResourceType.FILE, progName, true);
      if (filePath == null) {
        throw new RuntimeException("Could not download the resource: " + progName);
      }
      Path p = new Path(filePath);
      String fileName = p.getName();
      String scriptArgs = getScriptArgs(cmd);
      String finalCmd = fileName + scriptArgs;
      return finalCmd;
    }


    return cmd;
  }


  private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
      String colTypes, boolean defaultCols) throws SemanticException {
    if (child.getType() == HiveParser.TOK_SERDENAME) {
      String serdeName = unescapeSQLString(child.getChild(0).getText());
      Class<? extends Deserializer> serdeClass = null;


      try {
        serdeClass = (Class<? extends Deserializer>) Class.forName(serdeName,
            true, JavaUtils.getClassLoader());
      } catch (ClassNotFoundException e) {
        throw new SemanticException(e);
      }


      TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
          .toString(Utilities.tabCode), cols, colTypes, defaultCols);
      // copy all the properties
      if (child.getChildCount() == 2) {
        ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
        for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
          String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
              .getText());
          String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
              .getText());
          tblDesc.getProperties().setProperty(key, value);
        }
      }
      return tblDesc;
    } else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
      TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
          .toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
      int numChildRowFormat = child.getChildCount();
      for (int numC = 0; numC < numChildRowFormat; numC++) {
        ASTNode rowChild = (ASTNode) child.getChild(numC);
        switch (rowChild.getToken().getType()) {
        case HiveParser.TOK_TABLEROWFORMATFIELD:
          String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
          tblDesc.getProperties()
              .setProperty(Constants.FIELD_DELIM, fieldDelim);
          tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT,
              fieldDelim);


          if (rowChild.getChildCount() >= 2) {
            String fieldEscape = unescapeSQLString(rowChild.getChild(1)
                .getText());
            tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR,
                fieldEscape);
          }
          break;
        case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
          tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM,
              unescapeSQLString(rowChild.getChild(0).getText()));
          break;
        case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
          tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM,
              unescapeSQLString(rowChild.getChild(0).getText()));
          break;
        case HiveParser.TOK_TABLEROWFORMATLINES:
          String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
          tblDesc.getProperties().setProperty(Constants.LINE_DELIM, lineDelim);
          if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
            throw new SemanticException(
                ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg());
          }
          break;
        default:
          assert false;
        }
      }


      return tblDesc;
    }


    // should never come here
    return null;
  }


  private void failIfColAliasExists(Set<String> nameSet, String name)
      throws SemanticException {
    if (nameSet.contains(name)) {
      throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
          .getMsg(name));
    }
    nameSet.add(name);
  }


  @SuppressWarnings("nls")
  private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
      throws SemanticException {
    // If there is no "AS" clause, the output schema will be "key,value"
    ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
    int inputSerDeNum = 1, inputRecordWriterNum = 2;
    int outputSerDeNum = 4, outputRecordReaderNum = 5;
    int outputColsNum = 6;
    boolean outputColNames = false, outputColSchemas = false;
    int execPos = 3;
    boolean defaultOutputCols = false;


    // Go over all the children
    if (trfm.getChildCount() > outputColsNum) {
      ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
      if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
        outputColNames = true;
      } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
        outputColSchemas = true;
      }
    }


    // If column type is not specified, use a string
    if (!outputColNames && !outputColSchemas) {
      String intName = getColumnInternalName(0);
      ColumnInfo colInfo = new ColumnInfo(intName,
          TypeInfoFactory.stringTypeInfo, null, false);
      colInfo.setAlias("key");
      outputCols.add(colInfo);
      intName = getColumnInternalName(1);
      colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
          false);
      colInfo.setAlias("value");
      outputCols.add(colInfo);
      defaultOutputCols = true;
    } else {
      ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
      int ccount = collist.getChildCount();


      Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
      if (outputColNames) {
        for (int i = 0; i < ccount; ++i) {
          String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
              .getText());
          failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
          String intName = getColumnInternalName(i);
          ColumnInfo colInfo = new ColumnInfo(intName,
              TypeInfoFactory.stringTypeInfo, null, false);
          colInfo.setAlias(colAlias);
          outputCols.add(colInfo);
        }
      } else {
        for (int i = 0; i < ccount; ++i) {
          ASTNode child = (ASTNode) collist.getChild(i);
          assert child.getType() == HiveParser.TOK_TABCOL;
          String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
              .getText());
          failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
          String intName = getColumnInternalName(i);
          ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
              .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
              .getChild(1))), null, false);
          colInfo.setAlias(colAlias);
          outputCols.add(colInfo);
        }
      }
    }


    RowResolver out_rwsch = new RowResolver();
    StringBuilder columns = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();


    for (int i = 0; i < outputCols.size(); ++i) {
      if (i != 0) {
        columns.append(",");
        columnTypes.append(",");
      }


      columns.append(outputCols.get(i).getInternalName());
      columnTypes.append(outputCols.get(i).getType().getTypeName());


      out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
          outputCols.get(i));
    }


    StringBuilder inpColumns = new StringBuilder();
    StringBuilder inpColumnTypes = new StringBuilder();
    ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver()
        .getColumnInfos();
    for (int i = 0; i < inputSchema.size(); ++i) {
      if (i != 0) {
        inpColumns.append(",");
        inpColumnTypes.append(",");
      }


      inpColumns.append(inputSchema.get(i).getInternalName());
      inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
    }


    TableDesc outInfo;
    TableDesc errInfo;
    TableDesc inInfo;
    String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
    Class<? extends Deserializer> serde;


    try {
      serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName,
          true, JavaUtils.getClassLoader());
    } catch (ClassNotFoundException e) {
      throw new SemanticException(e);
    }


    // Input and Output Serdes
    if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
      inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
          .getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
          inpColumnTypes.toString(), false);
    } else {
      inInfo = PlanUtils.getTableDesc(serde, Integer
          .toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes
          .toString(), false, true);
    }


    if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
      outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
          .getChild(outputSerDeNum))).getChild(0), columns.toString(),
          columnTypes.toString(), false);
      // This is for backward compatibility. If the user did not specify the
      // output column list, we assume that there are 2 columns: key and value.
      // However, if the script outputs: col1, col2, col3 seperated by TAB, the
      // requirement is: key is col and value is (col2 TAB col3)
    } else {
      outInfo = PlanUtils.getTableDesc(serde, Integer
          .toString(Utilities.tabCode), columns.toString(), columnTypes
          .toString(), defaultOutputCols);
    }


    // Error stream always uses the default serde with a single column
    errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");


    // Output record readers
    Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
        .getChild(outputRecordReaderNum));
    Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
        .getChild(inputRecordWriterNum));
    Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new ScriptDesc(
        fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
        inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
        new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);


    return output;
  }


  private Class<? extends RecordReader> getRecordReader(ASTNode node)
      throws SemanticException {
    String name;


    if (node.getChildCount() == 0) {
      name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
    } else {
      name = unescapeSQLString(node.getChild(0).getText());
    }


    try {
      return (Class<? extends RecordReader>) Class.forName(name, true,
          JavaUtils.getClassLoader());
    } catch (ClassNotFoundException e) {
      throw new SemanticException(e);
    }
  }


  private Class<? extends RecordReader> getDefaultRecordReader()
      throws SemanticException {
    String name;


    name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);


    try {
      return (Class<? extends RecordReader>) Class.forName(name, true,
          JavaUtils.getClassLoader());
    } catch (ClassNotFoundException e) {
      throw new SemanticException(e);
    }
  }


  private Class<? extends RecordWriter> getRecordWriter(ASTNode node)
      throws SemanticException {
    String name;


    if (node.getChildCount() == 0) {
      name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
    } else {
      name = unescapeSQLString(node.getChild(0).getText());
    }


    try {
      return (Class<? extends RecordWriter>) Class.forName(name, true,
          JavaUtils.getClassLoader());
    } catch (ClassNotFoundException e) {
      throw new SemanticException(e);
    }
  }


  /**
   * This function is a wrapper of parseInfo.getGroupByForClause which
   * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
   * a,b,c.
   */
  static List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) {
    if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
      ASTNode selectExprs = parseInfo.getSelForClause(dest);
      List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
          : selectExprs.getChildCount());
      if (selectExprs != null) {
        for (int i = 0; i < selectExprs.getChildCount(); ++i) {
          if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
            continue;
          }
          // table.column AS alias
          ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
          result.add(grpbyExpr);
        }
      }
      return result;
    } else {
      ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
      List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null ? 0
          : grpByExprs.getChildCount());
      if (grpByExprs != null) {
        for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
          ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
          result.add(grpbyExpr);
        }
      }
      return result;
    }
  }


  private static String[] getColAlias(ASTNode selExpr, String defaultName,
      RowResolver inputRR) {
    String colAlias = null;
    String tabAlias = null;
    String[] colRef = new String[2];


    if (selExpr.getChildCount() == 2) {
      // return zz for "xx + yy AS zz"
      colAlias = unescapeIdentifier(selExpr.getChild(1).getText());
      colRef[0] = tabAlias;
      colRef[1] = colAlias;
      return colRef;
    }


    ASTNode root = (ASTNode) selExpr.getChild(0);
    if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
      colAlias = root.getChild(0).getText();
      colRef[0] = tabAlias;
      colRef[1] = colAlias;
      return colRef;
    }


    if (root.getType() == HiveParser.DOT) {
      ASTNode tab = (ASTNode) root.getChild(0);
      if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
        String t = unescapeIdentifier(tab.getChild(0).getText());
        if (inputRR.hasTableAlias(t)) {
          tabAlias = t;
        }
      }


      // Return zz for "xx.zz" and "xx.yy.zz"
      ASTNode col = (ASTNode) root.getChild(1);
      if (col.getType() == HiveParser.Identifier) {
        colAlias = unescapeIdentifier(col.getText());
      }
    }


    if (colAlias == null) {
      // Return defaultName if selExpr is not a simple xx.yy.zz
      colAlias = defaultName;
    }


    colRef[0] = tabAlias;
    colRef[1] = colAlias;
    return colRef;
  }


  /**
   * Returns whether the pattern is a regex expression (instead of a normal
   * string). Normal string is a string with all alphabets/digits and "_".
   */
  private static boolean isRegex(String pattern) {
    for (int i = 0; i < pattern.length(); i++) {
      if (!Character.isLetterOrDigit(pattern.charAt(i))
          && pattern.charAt(i) != '_') {
        return true;
      }
    }
    return false;
  }


  private Operator<?> genSelectPlan(String dest, QB qb, Operator<?> input)
      throws SemanticException {
    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);


    Operator<?> op = genSelectPlan(selExprList, qb, input);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Select Plan for clause: " + dest);
    }


    return op;
  }


  @SuppressWarnings("nls")
  private Operator<?> genSelectPlan(ASTNode selExprList, QB qb,
      Operator<?> input) throws SemanticException {


    if (LOG.isDebugEnabled()) {
      LOG.debug("tree: " + selExprList.toStringTree());
    }


    ArrayList<ExprNodeDesc> col_list = new ArrayList<ExprNodeDesc>();
    RowResolver out_rwsch = new RowResolver();
    ASTNode trfm = null;
    String alias = qb.getParseInfo().getAlias();
    Integer pos = Integer.valueOf(0);
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    // SELECT * or SELECT TRANSFORM(*)
    boolean selectStar = false;
    int posn = 0;
    boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
    if (hintPresent) {
      posn++;
    }


    boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
      HiveParser.TOK_TRANSFORM);
    if (isInTransform) {
      trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
    }


    // Detect queries of the form SELECT udtf(col) AS ...
    // by looking for a function as the first child, and then checking to see
    // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
    // the lack of a special token.
    boolean isUDTF = false;
    String udtfTableAlias = null;
    ArrayList<String> udtfColAliases = new ArrayList<String>();
    ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
    GenericUDTF genericUDTF = null;


    int udtfExprType = udtfExpr.getType();
    if (udtfExprType == HiveParser.TOK_FUNCTION
        || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
      String funcName = TypeCheckProcFactory.DefaultExprProcessor
          .getFunctionText(udtfExpr, true);
      FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
      if (fi != null) {
        genericUDTF = fi.getGenericUDTF();
      }
      isUDTF = (genericUDTF != null);
      if (isUDTF && !fi.isNative()) {
        unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
            .getChild(0));
      }
    }


    if (isUDTF) {
      // Only support a single expression when it's a UDTF
      if (selExprList.getChildCount() > 1) {
        throw new SemanticException(ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg());
      }
      // Require an AS for UDTFs for column aliases
      ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
      if (selExpr.getChildCount() < 2) {
        throw new SemanticException(ErrorMsg.UDTF_REQUIRE_AS.getMsg());
      }
      // Get the column / table aliases from the expression. Start from 1 as
      // 0 is the TOK_FUNCTION
      for (int i = 1; i < selExpr.getChildCount(); i++) {
        ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
        switch (selExprChild.getType()) {
        case HiveParser.Identifier:
          udtfColAliases.add(unescapeIdentifier(selExprChild.getText()));
          unparseTranslator.addIdentifierTranslation(selExprChild);
          break;
        case HiveParser.TOK_TABALIAS:
          assert (selExprChild.getChildCount() == 1);
          udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
              .getText());
          qb.addAlias(udtfTableAlias);
          unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
              .getChild(0));
          break;
        default:
          assert (false);
        }
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("UDTF table alias is " + udtfTableAlias);
        LOG.debug("UDTF col aliases are " + udtfColAliases);
      }
    }


    // The list of expressions after SELECT or SELECT TRANSFORM.
    ASTNode exprList;
    if (isInTransform) {
      exprList = (ASTNode) trfm.getChild(0);
    } else if (isUDTF) {
      exprList = udtfExpr;
    } else {
      exprList = selExprList;
    }


    if (LOG.isDebugEnabled()) {
      LOG.debug("genSelectPlan: input = " + inputRR.toString());
    }


    // For UDTF's, skip the function name to get the expressions
    int startPosn = isUDTF ? posn + 1 : posn;
    if (isInTransform) {
      startPosn = 0;
    }


    // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
    for (int i = startPosn; i < exprList.getChildCount(); ++i) {


      // child can be EXPR AS ALIAS, or EXPR.
      ASTNode child = (ASTNode) exprList.getChild(i);
      boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);


      // EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
      // This check is not needed and invalid when there is a transform b/c the
      // AST's are slightly different.
      if (!isInTransform && !isUDTF && child.getChildCount() > 2) {
        throw new SemanticException(ErrorMsg.INVALID_AS.getMsg());
      }


      // The real expression
      ASTNode expr;
      String tabAlias;
      String colAlias;


      if (isInTransform || isUDTF) {
        tabAlias = null;
        colAlias = "_C" + i;
        expr = child;
      } else {
        String[] colRef = getColAlias(child, "_C" + i, inputRR);
        tabAlias = colRef[0];
        colAlias = colRef[1];
        if (hasAsClause) {
          unparseTranslator.addIdentifierTranslation((ASTNode) child
              .getChild(1));
        }
        // Get rid of TOK_SELEXPR
        expr = (ASTNode) child.getChild(0);
      }


      if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
        pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
            : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()),
            expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
        selectStar = true;
      } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
          && !inputRR.getIsExprResolver()
          && isRegex(unescapeIdentifier(expr.getChild(0).getText()))) {
        // In case the expression is a regex COL.
        // This can only happen without AS clause
        // We don't allow this for ExprResolver - the Group By case
        pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
            null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
      } else if (expr.getType() == HiveParser.DOT
          && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
          && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
          .getChild(0).getText().toLowerCase())) && !hasAsClause
          && !inputRR.getIsExprResolver()
          && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) {
        // In case the expression is TABLE.COL (col can be regex).
        // This can only happen without AS clause
        // We don't allow this for ExprResolver - the Group By case
        pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
            unescapeIdentifier(expr.getChild(0).getChild(0).getText()
            .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
            qb.getAliases());
      } else {
        // Case when this is an expression
        ExprNodeDesc exp = genExprNodeDesc(expr, inputRR);
        col_list.add(exp);
        if (!StringUtils.isEmpty(alias)
            && (out_rwsch.get(null, colAlias) != null)) {
          throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(colAlias));
        }


        out_rwsch.put(tabAlias, colAlias, new ColumnInfo(
            getColumnInternalName(pos), exp.getTypeInfo(), tabAlias, false));


        pos = Integer.valueOf(pos.intValue() + 1);
      }
    }
    selectStar = selectStar && exprList.getChildCount() == posn + 1;


    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < col_list.size(); i++) {
      // Replace NULL with CAST(NULL AS STRING)
      if (col_list.get(i) instanceof ExprNodeNullDesc) {
        col_list.set(i, new ExprNodeConstantDesc(
            TypeInfoFactory.stringTypeInfo, null));
      }
      String outputCol = getColumnInternalName(i);
      colExprMap.put(outputCol, col_list.get(i));
      columnNames.add(outputCol);
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(col_list, columnNames, selectStar), new RowSchema(
        out_rwsch.getColumnInfos()), input), out_rwsch);


    output.setColumnExprMap(colExprMap);
    if (isInTransform) {
      output = genScriptPlan(trfm, qb, output);
    }


    if (isUDTF) {
      output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
          output);
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
    }
    return output;
  }


  /**
   * Class to store GenericUDAF related information.
   */
  static class GenericUDAFInfo {
    ArrayList<ExprNodeDesc> convertedParameters;
    GenericUDAFEvaluator genericUDAFEvaluator;
    TypeInfo returnType;
  }


  /**
   * Convert exprNodeDesc array to Typeinfo array.
   */
  static ArrayList<TypeInfo> getTypeInfo(ArrayList<ExprNodeDesc> exprs) {
    ArrayList<TypeInfo> result = new ArrayList<TypeInfo>();
    for (ExprNodeDesc expr : exprs) {
      result.add(expr.getTypeInfo());
    }
    return result;
  }


  /**
   * Convert exprNodeDesc array to Typeinfo array.
   */
  static ObjectInspector[] getStandardObjectInspector(ArrayList<TypeInfo> exprs) {
    ObjectInspector[] result = new ObjectInspector[exprs.size()];
    for (int i = 0; i < exprs.size(); i++) {
      result[i] = TypeInfoUtils
          .getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
    }
    return result;
  }


  /**
   * Returns the GenericUDAFEvaluator for the aggregation. This is called once
   * for each GroupBy aggregation.
   */
  static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
      ArrayList<ExprNodeDesc> aggParameters, ASTNode aggTree,
      boolean isDistinct, boolean isAllColumns)
      throws SemanticException {
    ArrayList<TypeInfo> originalParameterTypeInfos = getTypeInfo(aggParameters);
    GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
        aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
    if (null == result) {
      String reason = "Looking for UDAF Evaluator\"" + aggName
          + "\" with parameters " + originalParameterTypeInfos;
      throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
          (ASTNode) aggTree.getChild(0), reason));
    }
    return result;
  }


  /**
   * Returns the GenericUDAFInfo struct for the aggregation.
   *
   * @param aggName
   *          The name of the UDAF.
   * @param aggParameters
   *          The exprNodeDesc of the original parameters
   * @param aggTree
   *          The ASTNode node of the UDAF in the query.
   * @return GenericUDAFInfo
   * @throws SemanticException
   *           when the UDAF is not found or has problems.
   */
  static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
      GenericUDAFEvaluator.Mode emode, ArrayList<ExprNodeDesc> aggParameters)
      throws SemanticException {


    GenericUDAFInfo r = new GenericUDAFInfo();


    // set r.genericUDAFEvaluator
    r.genericUDAFEvaluator = evaluator;


    // set r.returnType
    ObjectInspector returnOI = null;
    try {
      ObjectInspector[] aggObjectInspectors =
          getStandardObjectInspector(getTypeInfo(aggParameters));
      returnOI = r.genericUDAFEvaluator.init(emode, aggObjectInspectors);
      r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
    } catch (HiveException e) {
      throw new SemanticException(e);
    }
    // set r.convertedParameters
    // TODO: type conversion
    r.convertedParameters = aggParameters;


    return r;
  }


  private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
      GroupByDesc.Mode mode, boolean isDistinct) {
    switch (mode) {
    case COMPLETE:
      return GenericUDAFEvaluator.Mode.COMPLETE;
    case PARTIAL1:
      return GenericUDAFEvaluator.Mode.PARTIAL1;
    case PARTIAL2:
      return GenericUDAFEvaluator.Mode.PARTIAL2;
    case PARTIALS:
      return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
          : GenericUDAFEvaluator.Mode.PARTIAL2;
    case FINAL:
      return GenericUDAFEvaluator.Mode.FINAL;
    case HASH:
      return GenericUDAFEvaluator.Mode.PARTIAL1;
    case MERGEPARTIAL:
      return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
          : GenericUDAFEvaluator.Mode.FINAL;
    default:
      throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
    }
  }


  /**
   * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
   * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
   *
   * @param mode
   *          The mode of the aggregation (PARTIAL1 or COMPLETE)
   * @param genericUDAFEvaluators
   *          If not null, this function will store the mapping from Aggregation
   *          StringTree to the genericUDAFEvaluator in this parameter, so it
   *          can be used in the next-stage GroupBy aggregations.
   * @return the new GroupByOperator
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
      String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
      Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
      throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx
        .get(reduceSinkOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);


      if (exprInfo == null) {
        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
      }


      groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
          .getInternalName(), "", false));
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(grpbyExpr,
          new ColumnInfo(field, exprInfo.getType(), null, false));
      colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
      List<String> inputKeyCols = ((ReduceSinkDesc)
          reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
      if (inputKeyCols.size() > 0) {
        lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
      }
    }
    int numDistinctUDFs = 0;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      ASTNode value = entry.getValue();


      // This is the GenericUDAF name
      String aggName = value.getChild(0).getText();
      boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
      boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;


      // Convert children to aggParameters
      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
      // 0 is the function name
      for (int i = 1; i < value.getChildCount(); i++) {
        ASTNode paraExpr = (ASTNode) value.getChild(i);
        ColumnInfo paraExprInfo =
          groupByInputRowResolver.getExpression(paraExpr);
        if (paraExprInfo == null) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
        }


        String paraExpression = paraExprInfo.getInternalName();
        assert (paraExpression != null);
        if (isDistinct && lastKeyColName != null) {
          // if aggr is distinct, the parameter is name is constructed as
          // KEY.lastKeyColName:<tag>._colx
          paraExpression = Utilities.ReduceField.KEY.name() + "." +
          lastKeyColName + ":" + numDistinctUDFs + "." +
          getColumnInternalName(i-1);


        }
        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
            paraExpression, paraExprInfo.getTabAlias(),
            paraExprInfo.getIsVirtualCol()));
      }


      if (isDistinct) {
        numDistinctUDFs++;
      }
      Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
      GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
          aggName, aggParameters, value, isDistinct, isAllColumns);
      assert (genericUDAFEvaluator != null);
      GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
          aggParameters);
      aggregations.add(new AggregationDesc(aggName.toLowerCase(),
          udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
          amode));
      String field = getColumnInternalName(groupByKeys.size()
          + aggregations.size() - 1);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(value, new ColumnInfo(
          field, udaf.returnType, "", false));
      // Save the evaluator so that it can be used by the next-stage
      // GroupByOperators
      if (genericUDAFEvaluators != null) {
        genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }


  /**
   * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
   * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
   *
   * @param mode
   *          The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @param distPartAggr
   *          partial aggregation for distincts
   * @return the new GroupByOperator
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
      String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
      Map<String, GenericUDAFEvaluator> genericUDAFEvaluators,
      boolean distPartAgg) throws SemanticException {
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    RowResolver groupByInputRowResolver = opParseCtx
        .get(reduceSinkOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);


      if (exprInfo == null) {
        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
      }


      groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
          .getInternalName(), exprInfo.getTabAlias(), exprInfo
          .getIsVirtualCol()));
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(grpbyExpr,
          new ColumnInfo(field, exprInfo.getType(), "", false));
      colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }


    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
      List<String> inputKeyCols = ((ReduceSinkDesc)
          reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
      if (inputKeyCols.size() > 0) {
        lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
      }
    }
    int numDistinctUDFs = 0;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      ASTNode value = entry.getValue();
      String aggName = value.getChild(0).getText();
      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
      boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);


      // If the function is distinct, partial aggregartion has not been done on
      // the client side.
      // If distPartAgg is set, the client is letting us know that partial
      // aggregation has not been done.
      // For eg: select a, count(b+c), count(distinct d+e) group by a
      // For count(b+c), if partial aggregation has been performed, then we
      // directly look for count(b+c).
      // Otherwise, we look for b+c.
      // For distincts, partial aggregation is never performed on the client
      // side, so always look for the parameters: d+e
      boolean partialAggDone = !(distPartAgg || isDistinct);
      if (!partialAggDone) {
        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
          ASTNode paraExpr = (ASTNode) value.getChild(i);
          ColumnInfo paraExprInfo =
            groupByInputRowResolver.getExpression(paraExpr);
          if (paraExprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN
                .getMsg(paraExpr));
          }


          String paraExpression = paraExprInfo.getInternalName();
          assert (paraExpression != null);
          if (isDistinct && lastKeyColName != null) {
            // if aggr is distinct, the parameter is name is constructed as
            // KEY.lastKeyColName:<tag>._colx
            paraExpression = Utilities.ReduceField.KEY.name() + "." +
            lastKeyColName + ":" + numDistinctUDFs + "."
            + getColumnInternalName(i-1);


          }
          aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
              paraExpression, paraExprInfo.getTabAlias(),
              paraExprInfo.getIsVirtualCol()));
        }
      } else {
        ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
        if (paraExprInfo == null) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
        }
        String paraExpression = paraExprInfo.getInternalName();
        assert (paraExpression != null);
        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
            paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
            .getIsVirtualCol()));
      }
      if (isDistinct) {
        numDistinctUDFs++;
      }
      boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
      Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
      GenericUDAFEvaluator genericUDAFEvaluator = null;
      // For distincts, partial aggregations have not been done
      if (distPartAgg) {
        genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters,
            value, isDistinct, isAllColumns);
        assert (genericUDAFEvaluator != null);
        genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
      } else {
        genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
      }


      GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
          aggParameters);
      aggregations.add(new AggregationDesc(aggName.toLowerCase(),
          udaf.genericUDAFEvaluator, udaf.convertedParameters,
          (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
      String field = getColumnInternalName(groupByKeys.size()
          + aggregations.size() - 1);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(value, new ColumnInfo(
          field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        distPartAgg,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver
        .getColumnInfos()), reduceSinkOperatorInfo),
        groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }


  /**
   * Generate the map-side GroupByOperator for the Query Block
   * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
   * of the inputOperatorInfo.
   *
   * @param mode
   *          The mode of the aggregation (HASH)
   * @param genericUDAFEvaluators
   *          If not null, this function will store the mapping from Aggregation
   *          StringTree to the genericUDAFEvaluator in this parameter, so it
   *          can be used in the next-stage GroupBy aggregations.
   * @return the new GroupByOperator
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest,
      Operator inputOperatorInfo, GroupByDesc.Mode mode,
      Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
      throws SemanticException {


    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
        .getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
          groupByInputRowResolver);


      groupByKeys.add(grpByExprNode);
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(grpbyExpr,
          new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
      colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }


    // If there is a distinctFuncExp, add all parameters to the reduceKeys.
    if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
      List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
      int numDistn = 0;
      for(ASTNode value: list) {
        // 0 is function name
        for (int i = 1; i < value.getChildCount(); i++) {
          ASTNode parameter = (ASTNode) value.getChild(i);
          if (groupByOutputRowResolver.getExpression(parameter) == null) {
            ExprNodeDesc distExprNode = genExprNodeDesc(parameter,
                groupByInputRowResolver);
            groupByKeys.add(distExprNode);
            numDistn++;
            String field = getColumnInternalName(grpByExprs.size() + numDistn -
                1);
            outputColumnNames.add(field);
            groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(
                field, distExprNode.getTypeInfo(), "", false));
            colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
          }
        }
      }
    }


    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);


    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      ASTNode value = entry.getValue();
      String aggName = unescapeIdentifier(value.getChild(0).getText());
      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
      new ArrayList<Class<?>>();
      // 0 is the function name
      for (int i = 1; i < value.getChildCount(); i++) {
        ASTNode paraExpr = (ASTNode) value.getChild(i);
        ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr,
            groupByInputRowResolver);


        aggParameters.add(paraExprNode);
      }


      boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
      boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
      Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);


      GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
          aggName, aggParameters, value, isDistinct, isAllColumns);
      assert (genericUDAFEvaluator != null);
      GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
          aggParameters);
      aggregations.add(new AggregationDesc(aggName.toLowerCase(),
          udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
          amode));
      String field = getColumnInternalName(groupByKeys.size()
          + aggregations.size() - 1);
      outputColumnNames.add(field);
      groupByOutputRowResolver.putExpression(value, new ColumnInfo(
          field, udaf.returnType, "", false));
      // Save the evaluator so that it can be used by the next-stage
      // GroupByOperators
      if (genericUDAFEvaluators != null) {
        genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }


  /**
   * Generate the ReduceSinkOperator for the Group By Query Block
   * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child
   * of inputOperatorInfo.
   *
   * It will put all Group By keys and the distinct field (if any) in the
   * map-reduce sort key, and all other fields in the map-reduce value.
   *
   * @param numPartitionFields
   *          the number of fields for map-reduce partitioning. This is usually
   *          the number of fields in the Group By keys.
   * @return the new ReduceSinkOperator.
   * @throws SemanticException
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanReduceSinkOperator(QB qb, String dest,
      Operator inputOperatorInfo, int numPartitionFields, int numReducers,
      boolean mapAggrDone) throws SemanticException {


    RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
        .getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver reduceSinkOutputRowResolver = new RowResolver();
    reduceSinkOutputRowResolver.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    // Pre-compute group-by keys and store in reduceKeys


    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr,
          reduceSinkInputRowResolver);
      reduceKeys.add(inputExpr);
      if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
        outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
        String field = Utilities.ReduceField.KEY.toString() + "."
            + getColumnInternalName(reduceKeys.size() - 1);
        ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
            reduceKeys.size() - 1).getTypeInfo(), null, false);
        reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
        colExprMap.put(colInfo.getInternalName(), inputExpr);
      } else {
        throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY
            .getMsg(grpbyExpr));
      }
    }


    List<List<Integer>> distinctColIndices = new ArrayList<List<Integer>>();
    // If there is a distinctFuncExp, add all parameters to the reduceKeys.
    if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
      List<ASTNode> distFuncs = parseInfo.getDistinctFuncExprsForClause(dest);
      String colName = getColumnInternalName(reduceKeys.size());
      outputKeyColumnNames.add(colName);
      for (int i = 0; i < distFuncs.size(); i++) {
        ASTNode value = distFuncs.get(i);
        int numExprs = 0;
        List<Integer> distinctIndices = new ArrayList<Integer>();
        // 0 is function name
        for (int j = 1; j < value.getChildCount(); j++) {
          ASTNode parameter = (ASTNode) value.getChild(j);
          ExprNodeDesc expr = genExprNodeDesc(parameter, reduceSinkInputRowResolver);
          // see if expr is already present in reduceKeys.
          // get index of expr in reduceKeys
          int ri;
          for (ri = 0; ri < reduceKeys.size(); ri++) {
            if (reduceKeys.get(ri).getExprString().equals(expr.getExprString())) {
              break;
            }
          }
          // add the expr to reduceKeys if it is not present
          if (ri == reduceKeys.size()) {
            reduceKeys.add(expr);
          }
          // add the index of expr in reduceKeys to distinctIndices
          distinctIndices.add(ri);
          String name = getColumnInternalName(numExprs);
          String field = Utilities.ReduceField.KEY.toString() + "." + colName
          + ":" + i
          + "." + name;
          ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false);
          reduceSinkOutputRowResolver.putExpression(parameter, colInfo);
          numExprs++;
        }
        distinctColIndices.add(distinctIndices);
      }
    }


    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);


    if (!mapAggrDone) {
      // Put parameters to aggregations in reduceValues
      for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        // 0 is function name
        for (int i = 1; i < value.getChildCount(); i++) {
          ASTNode parameter = (ASTNode) value.getChild(i);
          if (reduceSinkOutputRowResolver.getExpression(parameter) == null) {
            reduceValues.add(genExprNodeDesc(parameter,
                reduceSinkInputRowResolver));
            outputValueColumnNames
                .add(getColumnInternalName(reduceValues.size() - 1));
            String field = Utilities.ReduceField.VALUE.toString() + "."
                + getColumnInternalName(reduceValues.size() - 1);
            reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field,
                reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null,
                false));
          }
        }
      }
    } else {
      // Put partial aggregation results in reduceValues
      int inputField = reduceKeys.size();


      for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {


        TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(
            inputField).getType();
        reduceValues.add(new ExprNodeColumnDesc(type,
            getColumnInternalName(inputField), "", false));
        inputField++;
        outputValueColumnNames.add(getColumnInternalName(reduceValues.size() - 1));
        String field = Utilities.ReduceField.VALUE.toString() + "."
            + getColumnInternalName(reduceValues.size() - 1);
        reduceSinkOutputRowResolver.putExpression(entry.getValue(),
            new ColumnInfo(field, type, null, false));
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        grpByExprs.size(), reduceValues, distinctColIndices,
        outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields,
        numReducers), new RowSchema(reduceSinkOutputRowResolver
        .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }


  /**
   * Generate the second ReduceSinkOperator for the Group By Plan
   * (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of
   * groupByOperatorInfo.
   *
   * The second ReduceSinkOperator will put the group by keys in the map-reduce
   * sort key, and put the partial aggregation results in the map-reduce value.
   *
   * @param numPartitionFields
   *          the number of fields in the map-reduce partition key. This should
   *          always be the same as the number of Group By keys. We should be
   *          able to remove this parameter since in this phase there is no
   *          distinct any more.
   * @return the new ReduceSinkOperator.
   * @throws SemanticException
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo,
      String dest, Operator groupByOperatorInfo, int numPartitionFields,
      int numReducers) throws SemanticException {
    RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(
        groupByOperatorInfo).getRowResolver();
    RowResolver reduceSinkOutputRowResolver2 = new RowResolver();
    reduceSinkOutputRowResolver2.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // Get group-by keys and store in reduceKeys
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(
          grpbyExpr).getType();
      ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(typeInfo, field,
          "", false);
      reduceKeys.add(inputExpr);
      ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString()
          + "." + field, typeInfo, "", false);
      reduceSinkOutputRowResolver2.putExpression(grpbyExpr, colInfo);
      colExprMap.put(colInfo.getInternalName(), inputExpr);
    }
    // Get partial aggregation results and store in reduceValues
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    int inputField = reduceKeys.size();
    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      String field = getColumnInternalName(inputField);
      ASTNode t = entry.getValue();
      TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(t)
          .getType();
      reduceValues.add(new ExprNodeColumnDesc(typeInfo, field, "", false));
      inputField++;
      String col = getColumnInternalName(reduceValues.size() - 1);
      outputColumnNames.add(col);
      reduceSinkOutputRowResolver2.putExpression(t, new ColumnInfo(
          Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "",
          false));
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumnNames, true, -1, numPartitionFields,
        numReducers), new RowSchema(reduceSinkOutputRowResolver2
        .getColumnInfos()), groupByOperatorInfo),
        reduceSinkOutputRowResolver2);


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }


  /**
   * Generate the second GroupByOperator for the Group By Plan
   * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
   * aggregation based on the partial aggregation results.
   *
   * @param mode
   *          the mode of aggregation (FINAL)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @return the new GroupByOperator
   * @throws SemanticException
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo,
      String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode,
      Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
      throws SemanticException {
    RowResolver groupByInputRowResolver2 = opParseCtx.get(
        reduceSinkOperatorInfo2).getRowResolver();
    RowResolver groupByOutputRowResolver2 = new RowResolver();
    groupByOutputRowResolver2.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
      ASTNode grpbyExpr = grpByExprs.get(i);
      ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
      if (exprInfo == null) {
        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
      }


      String expression = exprInfo.getInternalName();
      groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression,
          exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      groupByOutputRowResolver2.putExpression(grpbyExpr,
          new ColumnInfo(field, exprInfo.getType(), "", false));
      colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    HashMap<String, ASTNode> aggregationTrees = parseInfo
        .getAggregationExprsForClause(dest);
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
      ASTNode value = entry.getValue();
      ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
      if (paraExprInfo == null) {
        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
      }
      String paraExpression = paraExprInfo.getInternalName();
      assert (paraExpression != null);
      aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
          paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
          .getIsVirtualCol()));


      String aggName = value.getChild(0).getText();


      boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
      boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
      Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
      GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators
          .get(entry.getKey());
      assert (genericUDAFEvaluator != null);
      GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
          aggParameters);
      aggregations
          .add(new AggregationDesc(
              aggName.toLowerCase(),
              udaf.genericUDAFEvaluator,
              udaf.convertedParameters,
              (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() ==
                HiveParser.TOK_FUNCTIONDI),
              amode));
      String field = getColumnInternalName(groupByKeys.size()
          + aggregations.size() - 1);
      outputColumnNames.add(field);
      groupByOutputRowResolver2.putExpression(value, new ColumnInfo(
          field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver2.getColumnInfos()),
        reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
  }


  /**
   * Generate a Group-By plan using a single map-reduce job (3 operators will be
   * inserted):
   *
   * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP,
   * A2_EXP) ) SortGroupBy (keys = (KEY.0,KEY.1), aggregations =
   * (count_distinct(KEY.2), sum(VALUE.0), count(VALUE.1))) Select (final
   * selects).
   *
   * @param dest
   * @param qb
   * @param input
   * @return
   * @throws SemanticException
   *
   *           Generate a Group-By plan using 1 map-reduce job. Spray by the
   *           group by key, and sort by the distinct key (if any), and compute
   *           aggregates * The aggregation evaluation functions are as
   *           follows: Partitioning Key: grouping key
   *
   *           Sorting Key: grouping key if no DISTINCT grouping + distinct key
   *           if DISTINCT
   *
   *           Reducer: iterate/merge (mode = COMPLETE)
   **/
  @SuppressWarnings({"nls"})
  private Operator genGroupByPlan1MR(String dest, QB qb, Operator input)
      throws SemanticException {


    QBParseInfo parseInfo = qb.getParseInfo();


    int numReducers = -1;
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    if (grpByExprs.isEmpty()) {
      numReducers = 1;
    }


    // ////// 1. Generate ReduceSinkOperator
    Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb,
        dest, input, grpByExprs.size(), numReducers, false);


    // ////// 2. Generate GroupbyOperator
    Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
        dest, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);


    return groupByOperatorInfo;
  }


  static ArrayList<GenericUDAFEvaluator> getUDAFEvaluators(
      ArrayList<AggregationDesc> aggs) {
    ArrayList<GenericUDAFEvaluator> result = new ArrayList<GenericUDAFEvaluator>();
    for (int i = 0; i < aggs.size(); i++) {
      result.add(aggs.get(i).getGenericUDAFEvaluator());
    }
    return result;
  }


  /**
   * Generate a Multi Group-By plan using a 2 map-reduce jobs.
   *
   * @param dest
   * @param qb
   * @param input
   * @return
   * @throws SemanticException
   *
   *           Generate a Group-By plan using a 2 map-reduce jobs. Spray by the
   *           distinct key in hope of getting a uniform distribution, and
   *           compute partial aggregates by the grouping key. Evaluate partial
   *           aggregates first, and spray by the grouping key to compute actual
   *           aggregates in the second phase. The agggregation evaluation
   *           functions are as follows: Partitioning Key: distinct key
   *
   *           Sorting Key: distinct key
   *
   *           Reducer: iterate/terminatePartial (mode = PARTIAL1)
   *
   *           STAGE 2
   *
   *           Partitioning Key: grouping key
   *
   *           Sorting Key: grouping key
   *
   *           Reducer: merge/terminate (mode = FINAL)
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlan2MRMultiGroupBy(String dest, QB qb,
      Operator input) throws SemanticException {


    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();


    QBParseInfo parseInfo = qb.getParseInfo();


    // ////// 2. Generate GroupbyOperator
    Operator groupByOperatorInfo = genGroupByPlanGroupByOperator1(parseInfo,
        dest, input, GroupByDesc.Mode.HASH, genericUDAFEvaluators, true);


    int numReducers = -1;
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);


    // ////// 3. Generate ReduceSinkOperator2
    Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(
        parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers);


    // ////// 4. Generate GroupbyOperator2
    Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo,
        dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL,
        genericUDAFEvaluators);


    return groupByOperatorInfo2;
  }


  /**
   * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be
   * inserted):
   *
   * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP,
   * A2_EXP) ) NOTE: If DISTINCT_EXP is null, partition by rand() SortGroupBy
   * (keys = (KEY.0,KEY.1), aggregations = (count_distinct(KEY.2), sum(VALUE.0),
   * count(VALUE.1))) ReduceSink ( keys = (0,1), values=(2,3,4)) SortGroupBy
   * (keys = (KEY.0,KEY.1), aggregations = (sum(VALUE.0), sum(VALUE.1),
   * sum(VALUE.2))) Select (final selects).
   *
   * @param dest
   * @param qb
   * @param input
   * @return
   * @throws SemanticException
   *
   *           Generate a Group-By plan using a 2 map-reduce jobs. Spray by the
   *           grouping key and distinct key (or a random number, if no distinct
   *           is present) in hope of getting a uniform distribution, and
   *           compute partial aggregates grouped by the reduction key (grouping
   *           key + distinct key). Evaluate partial aggregates first, and spray
   *           by the grouping key to compute actual aggregates in the second
   *           phase. The agggregation evaluation functions are as follows:
   *           Partitioning Key: random() if no DISTINCT grouping + distinct key
   *           if DISTINCT
   *
   *           Sorting Key: grouping key if no DISTINCT grouping + distinct key
   *           if DISTINCT
   *
   *           Reducer: iterate/terminatePartial (mode = PARTIAL1)
   *
   *           STAGE 2
   *
   *           Partitioning Key: grouping key
   *
   *           Sorting Key: grouping key if no DISTINCT grouping + distinct key
   *           if DISTINCT
   *
   *           Reducer: merge/terminate (mode = FINAL)
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlan2MR(String dest, QB qb, Operator input)
      throws SemanticException {


    QBParseInfo parseInfo = qb.getParseInfo();


    // ////// 1. Generate ReduceSinkOperator
    // There is a special case when we want the rows to be randomly distributed
    // to
    // reducers for load balancing problem. That happens when there is no
    // DISTINCT
    // operator. We set the numPartitionColumns to -1 for this purpose. This is
    // captured by WritableComparableHiveObject.hashCode() function.
    Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb,
        dest, input, (parseInfo.getDistinctFuncExprsForClause(dest).isEmpty() ?
        -1 : Integer.MAX_VALUE), -1, false);


    // ////// 2. Generate GroupbyOperator
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator(
        parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
        genericUDAFEvaluators);


    int numReducers = -1;
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    if (grpByExprs.isEmpty()) {
      numReducers = 1;
    }


    // ////// 3. Generate ReduceSinkOperator2
    Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(
        parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers);


    // ////// 4. Generate GroupbyOperator2
    Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo,
        dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL,
        genericUDAFEvaluators);


    return groupByOperatorInfo2;
  }


  private boolean optimizeMapAggrGroupBy(String dest, QB qb) {
    List<ASTNode> grpByExprs = getGroupByForClause(qb.getParseInfo(), dest);
    if ((grpByExprs != null) && !grpByExprs.isEmpty()) {
      return false;
    }


    if (!qb.getParseInfo().getDistinctFuncExprsForClause(dest).isEmpty()) {
      return false;
    }


    return true;
  }


  /**
   * Generate a Group-By plan using 1 map-reduce job. First perform a map-side
   * partial aggregation (to reduce the amount of data), at this point of time,
   * we may turn off map-side partial aggregation based on its performance. Then
   * spray by the group by key, and sort by the distinct key (if any), and
   * compute aggregates based on actual aggregates
   *
   * The agggregation evaluation functions are as follows: Mapper:
   * iterate/terminatePartial (mode = HASH)
   *
   * Partitioning Key: grouping key
   *
   * Sorting Key: grouping key if no DISTINCT grouping + distinct key if
   * DISTINCT
   *
   * Reducer: iterate/terminate if DISTINCT merge/terminate if NO DISTINCT (mode
   * = MERGEPARTIAL)
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanMapAggr1MR(String dest, QB qb,
      Operator inputOperatorInfo) throws SemanticException {


    QBParseInfo parseInfo = qb.getParseInfo();


    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);


    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
    int numReducers = -1;


    // Optimize the scenario when there are no grouping keys - only 1 reducer is
    // needed
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    if (grpByExprs.isEmpty()) {
      numReducers = 1;
    }


    // ////// Generate ReduceSink Operator
    Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb,
        dest, groupByOperatorInfo, grpByExprs.size(), numReducers, true);


    // This is a 1-stage map-reduce processing of the groupby. Tha map-side
    // aggregates was just used to
    // reduce output data. In case of distincts, partial results are not used,
    // and so iterate is again
    // invoked on the reducer. In case of non-distincts, partial results are
    // used, and merge is invoked
    // on the reducer.
    return genGroupByPlanGroupByOperator1(parseInfo, dest,
        reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL,
        genericUDAFEvaluators, false);
  }


  /**
   * Generate a Group-By plan using a 2 map-reduce jobs. However, only 1
   * group-by plan is generated if the query involves no grouping key and no
   * distincts. In that case, the plan is same as generated by
   * genGroupByPlanMapAggr1MR. Otherwise, the following plan is generated: First
   * perform a map side partial aggregation (to reduce the amount of data). Then
   * spray by the grouping key and distinct key (or a random number, if no
   * distinct is present) in hope of getting a uniform distribution, and compute
   * partial aggregates grouped by the reduction key (grouping key + distinct
   * key). Evaluate partial aggregates first, and spray by the grouping key to
   * compute actual aggregates in the second phase. The agggregation evaluation
   * functions are as follows: Mapper: iterate/terminatePartial (mode = HASH)
   *
   * Partitioning Key: random() if no DISTINCT grouping + distinct key if
   * DISTINCT
   *
   * Sorting Key: grouping key if no DISTINCT grouping + distinct key if
   * DISTINCT
   *
   * Reducer: iterate/terminatePartial if DISTINCT merge/terminatePartial if NO
   * DISTINCT (mode = MERGEPARTIAL)
   *
   * STAGE 2
   *
   * Partitioining Key: grouping key
   *
   * Sorting Key: grouping key if no DISTINCT grouping + distinct key if
   * DISTINCT
   *
   * Reducer: merge/terminate (mode = FINAL)
   */
  @SuppressWarnings("nls")
  private Operator genGroupByPlanMapAggr2MR(String dest, QB qb,
      Operator inputOperatorInfo) throws SemanticException {


    QBParseInfo parseInfo = qb.getParseInfo();


    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);


    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
    // Optimize the scenario when there are no grouping keys and no distinct - 2
    // map-reduce jobs are not needed
    // For eg: select count(1) from T where t.ds = ....
    if (!optimizeMapAggrGroupBy(dest, qb)) {


      // ////// Generate ReduceSink Operator
      Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb,
          dest, groupByOperatorInfo, (parseInfo
          .getDistinctFuncExprsForClause(dest).isEmpty() ? -1
          : Integer.MAX_VALUE), -1, true);


      // ////// Generate GroupbyOperator for a partial aggregation
      Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo,
          dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS,
          genericUDAFEvaluators, false);


      int numReducers = -1;
      List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
      if (grpByExprs.isEmpty()) {
        numReducers = 1;
      }


      // ////// Generate ReduceSinkOperator2
      Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(
          parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers);


      // ////// Generate GroupbyOperator3
      return genGroupByPlanGroupByOperator2MR(parseInfo, dest,
          reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL,
          genericUDAFEvaluators);
    } else {
      // ////// Generate ReduceSink Operator
      Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb,
          dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest)
          .size(), 1, true);


      return genGroupByPlanGroupByOperator2MR(parseInfo, dest,
          reduceSinkOperatorInfo, GroupByDesc.Mode.FINAL, genericUDAFEvaluators);
    }
  }


  @SuppressWarnings("nls")
  private Operator genConversionOps(String dest, QB qb, Operator input)
      throws SemanticException {


    Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest);
    switch (dest_type.intValue()) {
    case QBMetaData.DEST_TABLE: {
      qb.getMetaData().getDestTableForAlias(dest);
      break;
    }
    case QBMetaData.DEST_PARTITION: {
      qb.getMetaData().getDestPartitionForAlias(dest).getTable();
      break;
    }
    default: {
      return input;
    }
    }


    return input;
  }


  private int getReducersBucketing(int totalFiles, int maxReducers) {
    int numFiles = totalFiles/maxReducers;
    while (true) {
      if (totalFiles%numFiles == 0) {
        return totalFiles/numFiles;
      }
      numFiles++;
    }
  }


  private static class SortBucketRSCtx {
    ArrayList<ExprNodeDesc> partnCols;
    boolean multiFileSpray;
    int     numFiles;
    int     totalFiles;


    public SortBucketRSCtx() {
      partnCols = null;
      multiFileSpray = false;
      numFiles = 1;
      totalFiles = 1;
    }


    /**
     * @return the partnCols
     */
    public ArrayList<ExprNodeDesc> getPartnCols() {
      return partnCols;
    }


    /**
     * @param partnCols the partnCols to set
     */
    public void setPartnCols(ArrayList<ExprNodeDesc> partnCols) {
      this.partnCols = partnCols;
    }


    /**
     * @return the multiFileSpray
     */
    public boolean isMultiFileSpray() {
      return multiFileSpray;
    }


    /**
     * @param multiFileSpray the multiFileSpray to set
     */
    public void setMultiFileSpray(boolean multiFileSpray) {
      this.multiFileSpray = multiFileSpray;
    }


    /**
     * @return the numFiles
     */
    public int getNumFiles() {
      return numFiles;
    }


    /**
     * @param numFiles the numFiles to set
     */
    public void setNumFiles(int numFiles) {
      this.numFiles = numFiles;
    }


    /**
     * @return the totalFiles
     */
    public int getTotalFiles() {
      return totalFiles;
    }


    /**
     * @param totalFiles the totalFiles to set
     */
    public void setTotalFiles(int totalFiles) {
      this.totalFiles = totalFiles;
    }
  }


  @SuppressWarnings("nls")
  private Operator genBucketingSortingDest(String dest, Operator input, QB qb, TableDesc table_desc,
                                           Table dest_tab, SortBucketRSCtx ctx)
      throws SemanticException {


    // If the table is bucketed, and bucketing is enforced, do the following:
    // If the number of buckets is smaller than the number of maximum reducers,
    // create those many reducers.
    // If not, create a multiFileSink instead of FileSink - the multiFileSink will
    // spray the data into multiple buckets. That way, we can support a very large
    // number of buckets without needing a very large number of reducers.
    boolean enforceBucketing = false;
    boolean enforceSorting   = false;
    ArrayList<ExprNodeDesc> partnCols = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> sortCols  = new ArrayList<ExprNodeDesc>();
    boolean multiFileSpray = false;
    int     numFiles = 1;
    int     totalFiles = 1;


    if ((dest_tab.getNumBuckets() > 0) &&
        (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
      enforceBucketing = true;
      partnCols = getParitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, true);
      partnColsNoConvert = getParitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false);
    }


    if ((dest_tab.getSortCols() != null) &&
        (dest_tab.getSortCols().size() > 0) &&
        (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTING))) {
      enforceSorting = true;
      sortCols = getSortCols(dest, qb, dest_tab, table_desc, input, true);
      if (!enforceBucketing) {
        partnCols = sortCols;
        partnColsNoConvert = getSortCols(dest, qb, dest_tab, table_desc, input, false);
      }
    }


    if (enforceBucketing || enforceSorting) {
      int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
      int numBuckets  = dest_tab.getNumBuckets();
      if (numBuckets > maxReducers) {
        multiFileSpray = true;
        totalFiles = numBuckets;
        if (totalFiles % maxReducers == 0) {
          numFiles = totalFiles / maxReducers;
        }
        else {
          // find the number of reducers such that it is a divisor of totalFiles
          maxReducers = getReducersBucketing(totalFiles, maxReducers);
          numFiles = totalFiles/maxReducers;
        }
      }
      else {
        maxReducers = numBuckets;
      }


      input = genReduceSinkPlanForSortingBucketing(dest_tab, input, sortCols, partnCols, maxReducers);
      ctx.setMultiFileSpray(multiFileSpray);
      ctx.setNumFiles(numFiles);
      ctx.setPartnCols(partnColsNoConvert);
      ctx.setTotalFiles(totalFiles);
      //disable "merge mapfiles" and "merge mapred files".
      HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
      HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
    }
    return input;
  }


  /**
   * Check for HOLD_DDLTIME hint.
   * @param qb
   * @return true if HOLD_DDLTIME is set, false otherwise.
   */
  private boolean checkHoldDDLTime(QB qb) {
    ASTNode hints = qb.getParseInfo().getHints();
    if (hints == null) {
      return false;
    }
    for (int pos = 0; pos < hints.getChildCount(); pos++) {
      ASTNode hint = (ASTNode) hints.getChild(pos);
      if (((ASTNode) hint.getChild(0)).getToken().getType() == HiveParser.TOK_HOLD_DDLTIME) {
        return true;
      }
    }
    return false;
  }


  @SuppressWarnings("nls")
  private Operator genFileSinkPlan(String dest, QB qb, Operator input)
      throws SemanticException {


    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer dest_type = qbm.getDestTypeForAlias(dest);


    Table dest_tab = null;     // destination table if any
    Partition dest_part = null;// destination partition if any
    String queryTmpdir = null; // the intermediate destination directory
    Path dest_path = null; // the final destination directory
    TableDesc table_desc = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    boolean holdDDLTime = checkHoldDDLTime(qb);


    switch (dest_type.intValue()) {
    case QBMetaData.DEST_TABLE: {


      dest_tab = qbm.getDestTableForAlias(dest);


      Map<String, String> partSpec = qbm.getPartSpecForAlias(dest);
      dest_path = dest_tab.getPath();


      // check for partition
      List<FieldSchema> parts = dest_tab.getPartitionKeys();
      if (parts != null && parts.size() > 0) { // table is partitioned
        if (partSpec== null || partSpec.size() == 0) { // user did NOT specify partition
          throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
        }
        // the HOLD_DDLTIIME hint should not be used with dynamic partition since the
        // newly generated partitions should always update their DDLTIME
        if (holdDDLTime) {
          throw new SemanticException(ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg());
        }
        dpCtx = qbm.getDPCtx(dest);
        if (dpCtx == null) {
          Utilities.validatePartSpec(dest_tab, partSpec);
          dpCtx = new DynamicPartitionCtx(dest_tab, partSpec,
              conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME),
              conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
          qbm.setDPCtx(dest, dpCtx);
        }


        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP
          if (dpCtx.getNumDPCols() > 0 &&
              (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES) ||
               HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) &&
               Utilities.supportCombineFileInputFormat() == false) {
            // Do not support merge for Hadoop versions (pre-0.20) that do not
            // support CombineHiveInputFormat
            HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
            HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
          }
          // turn on hive.task.progress to update # of partitions created to the JT
          HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS, true);


        } else { // QBMetaData.DEST_PARTITION capture the all-SP case
          throw new SemanticException(ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg());
        }
        if (dpCtx.getSPPath() != null) {
          dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
        }
        if ((dest_tab.getNumBuckets() > 0) &&
            (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
          dpCtx.setNumBuckets(dest_tab.getNumBuckets());
        }
      }


      boolean isNonNativeTable = dest_tab.isNonNative();
      if (isNonNativeTable) {
        queryTmpdir = dest_path.toUri().getPath();
      } else {
        queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
      }
      if (dpCtx != null) {
        // set the root of the temporay path where dynamic partition columns will populate
        dpCtx.setRootPath(queryTmpdir);
      }
      // this table_desc does not contain the partitioning columns
      table_desc = Utilities.getTableDesc(dest_tab);


      // Add sorting/bucketing if needed
      input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);


      idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
      currentTableId = destTableId;
      destTableId++;


      // Create the work for moving the table
      // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
      if (!isNonNativeTable) {
        ltd = new LoadTableDesc(queryTmpdir, ctx.getExternalTmpFileURI(dest_path.toUri()),
            table_desc, dpCtx);
        if (holdDDLTime) {
          LOG.info("this query will not update transient_lastDdlTime!");
          ltd.setHoldDDLTime(true);
        }
        loadTableWork.add(ltd);
      }


      // Here only register the whole table for post-exec hook if no DP present
      // in the case of DP, we will register WriteEntity in MoveTask when the
      // list of dynamically created partitions are known.
      if ((dpCtx == null || dpCtx.getNumDPCols() == 0) &&
          !outputs.add(new WriteEntity(dest_tab))) {
        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
            .getMsg(dest_tab.getTableName()));
      }
      if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
        // No static partition specified
        if (dpCtx.getNumSPCols() == 0) {
          outputs.add(new WriteEntity(dest_tab, false));
        }
        // part of the partition specified
        // Create a DummyPartition in this case. Since, the metastore does not store partial
        // partitions currently, we need to store dummy partitions
        else {
          try {
            String ppath = dpCtx.getSPPath();
            ppath = ppath.substring(0, ppath.length()-1);
            DummyPartition p =
              new DummyPartition(dest_tab,
                                 dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath);
            outputs.add(new WriteEntity(p, false));
          } catch (HiveException e) {
            throw new SemanticException(e.getMessage());
          }
        }
      }


      break;
    }
    case QBMetaData.DEST_PARTITION: {


      dest_part = qbm.getDestPartitionForAlias(dest);
      dest_tab = dest_part.getTable();
      Path tabPath = dest_tab.getPath();
      Path partPath = dest_part.getPartitionPath();


        // if the table is in a different dfs than the partition,
        // replace the partition's dfs with the table's dfs.
      dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri()
          .getAuthority(), partPath.toUri().getPath());


      if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) {
        throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART
            .getMsg());
      }
      queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
      table_desc = Utilities.getTableDesc(dest_tab);


      // Add sorting/bucketing if needed
      input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);


      idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
      currentTableId = destTableId;
      destTableId++;


      ltd = new LoadTableDesc(queryTmpdir, ctx.getExternalTmpFileURI(dest_path.toUri()),
          table_desc, dest_part.getSpec());
      if (holdDDLTime) {
        try {
          Partition part = db.getPartition(dest_tab, dest_part.getSpec(), false);
          if (part == null) {
            throw new SemanticException(ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg());
          }
        } catch (HiveException e) {
          throw new SemanticException(e);
        }
        LOG.info("this query will not update transient_lastDdlTime!");
        ltd.setHoldDDLTime(true);
      }
      loadTableWork.add(ltd);
      if (!outputs.add(new WriteEntity(dest_part))) {
        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
            .getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
      }
      break;
    }
    case QBMetaData.DEST_LOCAL_FILE:
      isLocal = true;
      // fall through
    case QBMetaData.DEST_DFS_FILE: {
      dest_path = new Path(qbm.getDestFileForAlias(dest));
      String destStr = dest_path.toString();


      if (isLocal) {
        // for local directory - we always write to map-red intermediate
        // store and then copy to local fs
        queryTmpdir = ctx.getMRTmpFileURI();
      } else {
        // otherwise write to the file system implied by the directory
        // no copy is required. we may want to revisit this policy in future


        try {
          Path qPath = FileUtils.makeQualified(dest_path, conf);
          queryTmpdir = ctx.getExternalTmpFileURI(qPath.toUri());
        } catch (Exception e) {
          throw new SemanticException("Error creating temporary folder on: "
              + dest_path, e);
        }
      }
      String cols = new String();
      String colTypes = new String();
      ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();


      // CTAS case: the file output format and serde are defined by the create
      // table command
      // rather than taking the default value
      List<FieldSchema> field_schemas = null;
      CreateTableDesc tblDesc = qb.getTableDesc();
      if (tblDesc != null) {
        field_schemas = new ArrayList<FieldSchema>();
      }


      boolean first = true;
      for (ColumnInfo colInfo : colInfos) {
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());


        if (nm[1] != null) { // non-null column alias
          colInfo.setAlias(nm[1]);
        }


        if (field_schemas != null) {
          FieldSchema col = new FieldSchema();
          if (nm[1] != null) {
            col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase()); // remove ``
          } else {
            col.setName(colInfo.getInternalName());
          }
          col.setType(colInfo.getType().getTypeName());
          field_schemas.add(col);
        }


        if (!first) {
          cols = cols.concat(",");
          colTypes = colTypes.concat(":");
        }


        first = false;
        cols = cols.concat(colInfo.getInternalName());


        // Replace VOID type with string when the output is a temp table or
        // local files.
        // A VOID type can be generated under the query:
        //
        // select NULL from tt;
        // or
        // insert overwrite local directory "abc" select NULL from tt;
        //
        // where there is no column type to which the NULL value should be
        // converted.
        //
        String tName = colInfo.getType().getTypeName();
        if (tName.equals(Constants.VOID_TYPE_NAME)) {
          colTypes = colTypes.concat(Constants.STRING_TYPE_NAME);
        } else {
          colTypes = colTypes.concat(tName);
        }
      }


      // update the create table descriptor with the resulting schema.
      if (tblDesc != null) {
        tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
      }


      if (!ctx.isMRTmpFileURI(destStr)) {
        idToTableNameMap.put(String.valueOf(destTableId), destStr);
        currentTableId = destTableId;
        destTableId++;
      }


      boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
      loadFileWork.add(new LoadFileDesc(queryTmpdir, destStr, isDfsDir, cols,
          colTypes));


      if (tblDesc == null) {
        if (qb.getIsQuery()) {
          String fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
          table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat);
        } else {
          table_desc = PlanUtils.getDefaultTableDesc(Integer
              .toString(Utilities.ctrlaCode), cols, colTypes, false);
        }
      } else {
        table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
      }


      if (!outputs.add(new WriteEntity(destStr, !isDfsDir))) {
        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
            .getMsg(destStr));
      }
      break;
    }
    default:
      throw new SemanticException("Unknown destination type: " + dest_type);
    }


    input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
    inputRR = opParseCtx.get(input).getRowResolver();


    ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();


    try {
      StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc
          .getDeserializer().getObjectInspector();
      List<? extends StructField> fields = rowObjectInspector
          .getAllStructFieldRefs();
      for (int i = 0; i < fields.size(); i++) {
        vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils
            .getTypeInfoFromObjectInspector(fields.get(i)
            .getFieldObjectInspector()), "", false));
      }
    } catch (Exception e) {
      throw new SemanticException(e.getMessage());
    }


    RowSchema fsRS = new RowSchema(vecCol);


    FileSinkDesc fileSinkDesc = new FileSinkDesc(
      queryTmpdir,
      table_desc,
      conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT),
      currentTableId,
      rsCtx.isMultiFileSpray(),
      rsCtx.getNumFiles(),
      rsCtx.getTotalFiles(),
      rsCtx.getPartnCols(),
      dpCtx);


    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer  but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName());


    if (dest_part != null) {
      try {
        String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
        fileSinkDesc.setStaticSpec(staticSpec);
      } catch (MetaException e) {
        throw new SemanticException(e);
      }
    } else if (dpCtx != null) {
      fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc,
            fsRS, input), inputRR);


    if (ltd != null && SessionState.get() != null) {
      SessionState.get().getLineageState()
        .mapDirToFop(ltd.getSourceDir(), (FileSinkOperator)output);
    }


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: "
                + dest_path + " row schema: " + inputRR.toString());
    }


    return output;
  }




  /**
   * Generate the conversion SelectOperator that converts the columns into the
   * types that are expected by the table_desc.
   */
  Operator genConversionSelectOperator(String dest, QB qb, Operator input,
      TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
    StructObjectInspector oi = null;
    try {
      Deserializer deserializer = table_desc.getDeserializerClass()
          .newInstance();
      deserializer.initialize(conf, table_desc.getProperties());
      oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
      throw new SemanticException(e);
    }


    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver()
        .getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }


    if (inColumnCnt != outColumnCnt) {
      String reason = "Table " + dest + " has " + outColumnCnt
          + " columns, but query has " + inColumnCnt + " columns.";
      throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(
          qb.getParseInfo().getDestForClause(dest), reason));
    } else if (dynPart && dpCtx != null){
      // create the mapping from input ExprNode to dest table DP column
      dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size()));
    }


    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(
        columnNumber);
    // MetadataTypedColumnsetSerDe does not need type conversions because it
    // does the conversion to String by itself.
    boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(
        MetadataTypedColumnsetSerDe.class);
    boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(
        LazySimpleSerDe.class);
    if (!isMetaDataSerDe) {


      // here only deals with non-partition columns. We deal with partition columns next
      for (int i = 0; i < columnNumber; i++) {
        ObjectInspector tableFieldOI = tableFields.get(i)
            .getFieldObjectInspector();
        TypeInfo tableFieldTypeInfo = TypeInfoUtils
            .getTypeInfoFromObjectInspector(tableFieldOI);
        TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
        ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo,
            rowFields.get(i).getInternalName(), "", false);
        // LazySimpleSerDe can convert any types to String type using
        // JSON-format.
        if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)
            && !(isLazySimpleSerDe
            && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) && tableFieldTypeInfo
            .equals(TypeInfoFactory.stringTypeInfo))) {
          // need to do some conversions here
          converted = true;
          if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            // cannot convert to complex types
            column = null;
          } else {
            column = TypeCheckProcFactory.DefaultExprProcessor
                .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(),
                    column);
          }
          if (column == null) {
            String reason = "Cannot convert column " + i + " from "
                + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
            throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH
                .getMsg(qb.getParseInfo().getDestForClause(dest), reason));
          }
        }
        expressions.add(column);
      }
    }


    // deal with dynamic partition columns: convert ExprNodeDesc type to String??
    if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // DP columns starts with tableFields.size()
      for (int i = tableFields.size(); i < rowFields.size(); ++i ) {
        TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
        ExprNodeDesc column = new ExprNodeColumnDesc(
            rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", false);
        expressions.add(column);
      }
      // converted = true; // [TODO]: should we check & convert type to String and set it to true?
    }


    if (converted) {
      // add the select operator
      RowResolver rowResolver = new RowResolver();
      ArrayList<String> colName = new ArrayList<String>();
      for (int i = 0; i < expressions.size(); i++) {
        String name = getColumnInternalName(i);
        rowResolver.put("", name, new ColumnInfo(name, expressions.get(i)
            .getTypeInfo(), "", false));
        colName.add(name);
      }
      Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
          new SelectDesc(expressions, colName), new RowSchema(rowResolver
          .getColumnInfos()), input), rowResolver);


      return output;
    } else {
      // not converted
      return input;
    }
  }


  @SuppressWarnings("nls")
  private Operator genLimitPlan(String dest, QB qb, Operator input, int limit)
      throws SemanticException {
    // A map-only job can be optimized - instead of converting it to a
    // map-reduce job, we can have another map
    // job to do the same to avoid the cost of sorting in the map-reduce phase.
    // A better approach would be to
    // write into a local file and then have a map-only job.
    // Add the limit operator to get the value fields


    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new LimitDesc(limit), new RowSchema(inputRR.getColumnInfos()), input),
        inputRR);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created LimitOperator Plan for clause: " + dest
                + " row schema: " + inputRR.toString());
    }


    return limitMap;
  }


  private Operator genUDTFPlan(GenericUDTF genericUDTF,
      String outputTableAlias, ArrayList<String> colAliases, QB qb,
      Operator input) throws SemanticException {


    // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
    QBParseInfo qbp = qb.getParseInfo();
    if (!qbp.getDestToGroupBy().isEmpty()) {
      throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
    }
    if (!qbp.getDestToDistributeBy().isEmpty()) {
      throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
    }
    if (!qbp.getDestToSortBy().isEmpty()) {
      throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
    }
    if (!qbp.getDestToClusterBy().isEmpty()) {
      throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
    }
    if (!qbp.getAliasToLateralViews().isEmpty()) {
      throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
    }


    if (LOG.isDebugEnabled()) {
      LOG.debug("Table alias: " + outputTableAlias + " Col aliases: "
                + colAliases);
    }


    // Use the RowResolver from the input operator to generate a input
    // ObjectInspector that can be used to initialize the UDTF. Then, the


    // resulting output object inspector can be used to make the RowResolver
    // for the UDTF operator
    RowResolver selectRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> inputCols = selectRR.getColumnInfos();


    // Create the object inspector for the input columns and initialize the UDTF
    ArrayList<String> colNames = new ArrayList<String>();
    ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()];
    for (int i = 0; i < inputCols.size(); i++) {
      colNames.add(inputCols.get(i).getInternalName());
      colOIs[i] = TypeInfoUtils
          .getStandardWritableObjectInspectorFromTypeInfo(inputCols.get(i)
          .getType());
    }
    StructObjectInspector outputOI = genericUDTF.initialize(colOIs);


    // Make sure that the number of column aliases in the AS clause matches
    // the number of columns output by the UDTF
    int numUdtfCols = outputOI.getAllStructFieldRefs().size();
    int numSuppliedAliases = colAliases.size();
    if (numUdtfCols != numSuppliedAliases) {
      throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH
          .getMsg("expected " + numUdtfCols + " aliases " + "but got "
          + numSuppliedAliases));
    }


    // Generate the output column info's / row resolver using internal names.
    ArrayList<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();


    Iterator<String> colAliasesIter = colAliases.iterator();
    for (StructField sf : outputOI.getAllStructFieldRefs()) {


      String colAlias = colAliasesIter.next();
      assert (colAlias != null);


      // Since the UDTF operator feeds into a LVJ operator that will rename
      // all the internal names, we can just use field name from the UDTF's OI
      // as the internal name
      ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils
          .getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()),
          outputTableAlias, false);
      udtfCols.add(col);
    }


    // Create the row resolver for this operator from the output columns
    RowResolver out_rwsch = new RowResolver();
    for (int i = 0; i < udtfCols.size(); i++) {
      out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
    }


    // Add the UDTFOperator to the operator DAG
    Operator<?> udtf = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new UDTFDesc(genericUDTF), new RowSchema(out_rwsch.getColumnInfos()),
        input), out_rwsch);
    return udtf;
  }


  @SuppressWarnings("nls")
  private Operator genLimitMapRedPlan(String dest, QB qb, Operator input,
      int limit, boolean extraMRStep) throws SemanticException {
    // A map-only job can be optimized - instead of converting it to a
    // map-reduce job, we can have another map
    // job to do the same to avoid the cost of sorting in the map-reduce phase.
    // A better approach would be to
    // write into a local file and then have a map-only job.
    // Add the limit operator to get the value fields
    Operator curr = genLimitPlan(dest, qb, input, limit);


    // the client requested that an extra map-reduce step be performed
    if (!extraMRStep) {
      return curr;
    }


    // Create a reduceSink operator followed by another limit
    curr = genReduceSinkPlan(dest, qb, curr, 1);
    return genLimitPlan(dest, qb, curr, limit);
  }


  private ArrayList<ExprNodeDesc> getParitionColsFromBucketCols(String dest, QB qb, Table tab,
                                                                TableDesc table_desc, Operator input, boolean convert)
    throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    List<String> tabBucketCols = tab.getBucketCols();
    List<FieldSchema> tabCols  = tab.getCols();


    // Partition by the bucketing column
    List<Integer> posns = new ArrayList<Integer>();


    for (String bucketCol : tabBucketCols) {
      int pos = 0;
      for (FieldSchema tabCol : tabCols) {
        if (bucketCol.equals(tabCol.getName())) {
          posns.add(pos);
          break;
        }
        pos++;
      }
    }


    return genConvertCol(dest, qb, tab, table_desc, input, posns, convert);
  }


  private ArrayList<ExprNodeDesc> genConvertCol(String dest, QB qb, Table tab, TableDesc table_desc, Operator input,
                                                List<Integer> posns, boolean convert) throws SemanticException {
    StructObjectInspector oi = null;
    try {
      Deserializer deserializer = table_desc.getDeserializerClass()
          .newInstance();
      deserializer.initialize(conf, table_desc.getProperties());
      oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
      throw new SemanticException(e);
    }


    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver()
        .getColumnInfos();


    // Check column type
    int columnNumber = posns.size();
    ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    for (Integer posn: posns) {
      ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector();
      TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
      TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType();
      ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn).getInternalName(),
                                                   rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol());


      if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
        // need to do some conversions here
        if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
          // cannot convert to complex types
          column = null;
        } else {
          column = TypeCheckProcFactory.DefaultExprProcessor
            .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(),
                column);
        }
        if (column == null) {
          String reason = "Cannot convert column " + posn + " from "
            + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
          throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH
                                      .getMsg(qb.getParseInfo().getDestForClause(dest), reason));
        }
      }
      expressions.add(column);
    }


    return expressions;
  }


  private ArrayList<ExprNodeDesc> getSortCols(String dest, QB qb, Table tab, TableDesc table_desc, Operator input, boolean convert)
    throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    List<Order> tabSortCols = tab.getSortCols();
    List<FieldSchema> tabCols  = tab.getCols();


    // Partition by the bucketing column
    List<Integer> posns = new ArrayList<Integer>();
    for (Order sortCol : tabSortCols) {
      int pos = 0;
      for (FieldSchema tabCol : tabCols) {
        if (sortCol.getCol().equals(tabCol.getName())) {
          ColumnInfo colInfo = inputRR.getColumnInfos().get(pos);
          posns.add(pos);
          break;
        }
        pos++;
      }
    }


    return genConvertCol(dest, qb, tab, table_desc, input, posns, convert);
  }


  @SuppressWarnings("nls")
  private Operator genReduceSinkPlanForSortingBucketing(Table tab, Operator input,
                                                        ArrayList<ExprNodeDesc> sortCols,
                                                        ArrayList<ExprNodeDesc> partitionCols,
                                                        int numReducers)
    throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();


    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo colInfo : inputRR.getColumnInfos()) {
      valueCols.add(new ExprNodeColumnDesc(colInfo.getType(), colInfo
          .getInternalName(), colInfo.getTabAlias(), colInfo
          .getIsVirtualCol()));
      colExprMap.put(colInfo.getInternalName(), valueCols
          .get(valueCols.size() - 1));
    }


    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(getColumnInternalName(i));
    }


    StringBuilder order = new StringBuilder();
    for (int i = 0; i < sortCols.size(); i++) {
      order.append("+");
    }


    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
        .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1,
                           partitionCols, order.toString(), numReducers),
        new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    interim.setColumnExprMap(colExprMap);


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = inputRR;
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(
          getColumnInternalName(pos), colInfo.getType(), info[0],
          colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
        Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(
        out_rwsch.getColumnInfos()), interim), out_rwsch);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() +
                " row schema: " + out_rwsch.toString());
    }
    return output;


  }


  @SuppressWarnings("nls")
  private Operator genReduceSinkPlan(String dest, QB qb, Operator input,
      int numReducers) throws SemanticException {


    RowResolver inputRR = opParseCtx.get(input).getRowResolver();


    // First generate the expression for the partition and sort keys
    // The cluster by clause / distribute by clause has the aliases for
    // partition function
    ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest);
    if (partitionExprs == null) {
      partitionExprs = qb.getParseInfo().getDistributeByForClause(dest);
    }
    ArrayList<ExprNodeDesc> partitionCols = new ArrayList<ExprNodeDesc>();
    if (partitionExprs != null) {
      int ccount = partitionExprs.getChildCount();
      for (int i = 0; i < ccount; ++i) {
        ASTNode cl = (ASTNode) partitionExprs.getChild(i);
        partitionCols.add(genExprNodeDesc(cl, inputRR));
      }
    }


    ASTNode sortExprs = qb.getParseInfo().getClusterByForClause(dest);
    if (sortExprs == null) {
      sortExprs = qb.getParseInfo().getSortByForClause(dest);
    }


    if (sortExprs == null) {
      sortExprs = qb.getParseInfo().getOrderByForClause(dest);
      if (sortExprs != null) {
        assert numReducers == 1;
        // in strict mode, in the presence of order by, limit must be specified
        Integer limit = qb.getParseInfo().getDestLimit(dest);
        if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase(
            "strict")
            && limit == null) {
          throw new SemanticException(ErrorMsg.NO_LIMIT_WITH_ORDERBY
              .getMsg(sortExprs));
        }
      }
    }


    ArrayList<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder order = new StringBuilder();
    if (sortExprs != null) {
      int ccount = sortExprs.getChildCount();
      for (int i = 0; i < ccount; ++i) {
        ASTNode cl = (ASTNode) sortExprs.getChild(i);


        if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
          // SortBy ASC
          order.append("+");
          cl = (ASTNode) cl.getChild(0);
        } else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) {
          // SortBy DESC
          order.append("-");
          cl = (ASTNode) cl.getChild(0);
        } else {
          // ClusterBy
          order.append("+");
        }
        ExprNodeDesc exprNode = genExprNodeDesc(cl, inputRR);
        sortCols.add(exprNode);
      }
    }


    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo colInfo : inputRR.getColumnInfos()) {
      valueCols.add(new ExprNodeColumnDesc(colInfo.getType(), colInfo
          .getInternalName(), colInfo.getTabAlias(), colInfo
          .getIsVirtualCol()));
      colExprMap.put(colInfo.getInternalName(), valueCols
          .get(valueCols.size() - 1));
    }


    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(getColumnInternalName(i));
    }
    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
        .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1,
        partitionCols, order.toString(), numReducers),
        new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    interim.setColumnExprMap(colExprMap);


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = inputRR;
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(
          getColumnInternalName(pos), colInfo.getType(), info[0],
          colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
        Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(
        out_rwsch.getColumnInfos()), interim), out_rwsch);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: "
                + out_rwsch.toString());
    }
    return output;
  }


  private Operator genJoinOperatorChildren(QBJoinTree join, Operator left,
      Operator[] right, HashSet<Integer> omitOpts) throws SemanticException {


    RowResolver outputRS = new RowResolver();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // all children are base classes
    Operator<?>[] rightOps = new Operator[right.length];
    int outputPos = 0;


    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    HashMap<Byte, List<ExprNodeDesc>> filterMap =
      new HashMap<Byte, List<ExprNodeDesc>>();


    for (int pos = 0; pos < right.length; ++pos) {


      Operator input = right[pos];
      if (input == null) {
        input = left;
      }


      ArrayList<ExprNodeDesc> keyDesc = new ArrayList<ExprNodeDesc>();
      ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
      Byte tag = Byte.valueOf((byte) (((ReduceSinkDesc) (input.getConf()))
          .getTag()));


      // check whether this input operator produces output
      if (omitOpts == null || !omitOpts.contains(pos)) {
        // prepare output descriptors for the input opt
        RowResolver inputRS = opParseCtx.get(input).getRowResolver();
        Iterator<String> keysIter = inputRS.getTableNames().iterator();
        Set<String> aliases = posToAliasMap.get(pos);
        if (aliases == null) {
          aliases = new HashSet<String>();
          posToAliasMap.put(pos, aliases);
        }
        while (keysIter.hasNext()) {
          String key = keysIter.next();
          aliases.add(key);
          HashMap<String, ColumnInfo> map = inputRS.getFieldMap(key);
          Iterator<String> fNamesIter = map.keySet().iterator();
          while (fNamesIter.hasNext()) {
            String field = fNamesIter.next();
            ColumnInfo valueInfo = inputRS.get(key, field);
            keyDesc.add(new ExprNodeColumnDesc(valueInfo.getType(), valueInfo
                .getInternalName(), valueInfo.getTabAlias(), valueInfo
                .getIsVirtualCol()));


            if (outputRS.get(key, field) == null) {
              String colName = getColumnInternalName(outputPos);
              outputPos++;
              outputColumnNames.add(colName);
              colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1));
              outputRS.put(key, field, new ColumnInfo(colName, valueInfo
                  .getType(), key, valueInfo.getIsVirtualCol(), valueInfo
                  .isHiddenVirtualCol()));
              reversedExprs.put(colName, tag);
            }
          }
        }
        for (ASTNode cond : join.getFilters().get(tag)) {
          filterDesc.add(genExprNodeDesc(cond, inputRS));
        }
      }
      exprMap.put(tag, keyDesc);
      filterMap.put(tag, filterDesc);
      rightOps[pos] = input;
    }


    JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
    for (int i = 0; i < join.getJoinCond().length; i++) {
      JoinCond condn = join.getJoinCond()[i];
      joinCondns[i] = new JoinCondDesc(condn);
    }


    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames,
        join.getNoOuterJoin(), joinCondns, filterMap);
    desc.setReversedExprs(reversedExprs);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc,
        new RowSchema(outputRS.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    return putOpInsertMap(joinOp, outputRS);
  }


  @SuppressWarnings("nls")
  private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree,
      Operator child, String srcName, int pos) throws SemanticException {
    RowResolver inputRS = opParseCtx.get(child).getRowResolver();
    RowResolver outputRS = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();


    // Compute join keys and store in reduceKeys
    ArrayList<ASTNode> exprs = joinTree.getExpressions().get(pos);
    for (int i = 0; i < exprs.size(); i++) {
      ASTNode expr = exprs.get(i);
      reduceKeys.add(genExprNodeDesc(expr, inputRS));
    }


    // Walk over the input row resolver and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    Iterator<String> tblNamesIter = inputRS.getTableNames().iterator();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    while (tblNamesIter.hasNext()) {
      String src = tblNamesIter.next();
      HashMap<String, ColumnInfo> fMap = inputRS.getFieldMap(src);
      for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
        String field = entry.getKey();
        ColumnInfo valueInfo = entry.getValue();
        ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(valueInfo
            .getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(),
            valueInfo.getIsVirtualCol());
        reduceValues.add(inputExpr);
        if (outputRS.get(src, field) == null) {
          String col = getColumnInternalName(reduceValues.size() - 1);
          outputColumns.add(col);
          ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE
              .toString()
              + "." + col, valueInfo.getType(), src, valueInfo
              .getIsVirtualCol(), valueInfo.isHiddenVirtualCol());


          colExprMap.put(newColInfo.getInternalName(), inputExpr);
          outputRS.put(src, field, newColInfo);
        }
      }
    }


    int numReds = -1;


    // Use only 1 reducer in case of cartesian product
    if (reduceKeys.size() == 0) {
      numReds = 1;


      // Cartesian product is not supported in strict mode
      if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase(
          "strict")) {
        throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg());
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumns, false, joinTree.getNextTag(),
        reduceKeys.size(), numReds), new RowSchema(outputRS
        .getColumnInfos()), child), outputRS);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }


  private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
      HashMap<String, Operator> map) throws SemanticException {
    QBJoinTree leftChild = joinTree.getJoinSrc();
    Operator joinSrcOp = null;
    if (leftChild != null) {
      Operator joinOp = genJoinOperator(qb, leftChild, map);
      ArrayList<ASTNode> filter = joinTree.getFiltersForPushing().get(0);
      for (ASTNode cond : filter) {
        joinOp = genFilterPlan(qb, cond, joinOp);
      }


      joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinOp, null, 0);
    }


    Operator[] srcOps = new Operator[joinTree.getBaseSrc().length];


    HashSet<Integer> omitOpts = null; // set of input to the join that should be
    // omitted by the output
    int pos = 0;
    for (String src : joinTree.getBaseSrc()) {
      if (src != null) {
        Operator srcOp = map.get(src);


        // for left-semi join, generate an additional selection & group-by
        // operator before ReduceSink
        ArrayList<ASTNode> fields = joinTree.getRHSSemijoinColumns(src);
        if (fields != null) {
          // the RHS table columns should be not be output from the join
          if (omitOpts == null) {
            omitOpts = new HashSet<Integer>();
          }
          omitOpts.add(pos);


          // generate a selection operator for group-by keys only
          srcOp = insertSelectForSemijoin(fields, srcOp);


          // generate a groupby operator (HASH mode) for a map-side partial
          // aggregation for semijoin
          srcOp = genMapGroupByForSemijoin(qb, fields, srcOp,
              GroupByDesc.Mode.HASH);
        }


        // generate a ReduceSink operator for the join
        srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos);
        pos++;
      } else {
        assert pos == 0;
        srcOps[pos++] = null;
      }
    }


    // Type checking and implicit type conversion for join keys
    genJoinOperatorTypeCheck(joinSrcOp, srcOps);


    JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree,
        joinSrcOp, srcOps, omitOpts);
    joinContext.put(joinOp, joinTree);
    return joinOp;
  }


  /**
   * Construct a selection operator for semijoin that filter out all fields
   * other than the group by keys.
   *
   * @param fields
   *          list of fields need to be output
   * @param input
   *          input operator
   * @return the selection operator.
   * @throws SemanticException
   */
  private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields,
      Operator input) throws SemanticException {


    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();


    // construct the list of columns that need to be projected
    for (ASTNode field : fields) {
      ExprNodeColumnDesc exprNode = (ExprNodeColumnDesc) genExprNodeDesc(field,
          inputRR);
      colList.add(exprNode);
      columnNames.add(exprNode.getColumn());
    }


    // create selection operator
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(colList, columnNames, false), new RowSchema(inputRR
        .getColumnInfos()), input), inputRR);


    output.setColumnExprMap(input.getColumnExprMap());
    return output;
  }


  private Operator genMapGroupByForSemijoin(QB qb, ArrayList<ASTNode> fields, // the
      // ASTNode
      // of
      // the
      // join
      // key
      // "tab.col"
      Operator inputOperatorInfo, GroupByDesc.Mode mode)
      throws SemanticException {


    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
        .getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    qb.getParseInfo();


    groupByOutputRowResolver.setIsExprResolver(true); // join keys should only
    // be columns but not be
    // expressions


    for (int i = 0; i < fields.size(); ++i) {
      // get the group by keys to ColumnInfo
      ASTNode colName = fields.get(i);
      ExprNodeDesc grpByExprNode = genExprNodeDesc(colName,
          groupByInputRowResolver);
      groupByKeys.add(grpByExprNode);


      // generate output column names
      String field = getColumnInternalName(i);
      outputColumnNames.add(field);
      ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(),
          "", false);
      groupByOutputRowResolver.putExpression(colName, colInfo2);


      // establish mapping from the output column to the input column
      colExprMap.put(field, grpByExprNode);
    }


    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);


    op.setColumnExprMap(colExprMap);
    return op;
  }


  private void genJoinOperatorTypeCheck(Operator left, Operator[] right)
      throws SemanticException {
    // keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list
    ArrayList<ArrayList<ExprNodeDesc>> keys = new ArrayList<ArrayList<ExprNodeDesc>>();
    int keyLength = 0;
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();
      if (i == 0) {
        keyLength = now.getKeyCols().size();
      } else {
        assert (keyLength == now.getKeyCols().size());
      }
      keys.add(now.getKeyCols());
    }
    // implicit type conversion hierarchy
    for (int k = 0; k < keyLength; k++) {
      // Find the common class for type conversion
      TypeInfo commonType = keys.get(0).get(k).getTypeInfo();
      for (int i = 1; i < right.length; i++) {
        TypeInfo a = commonType;
        TypeInfo b = keys.get(i).get(k).getTypeInfo();
        commonType = FunctionRegistry.getCommonClassForComparison(a, b);
        if (commonType == null) {
          throw new SemanticException(
              "Cannot do equality join on different types: " + a.getTypeName()
              + " and " + b.getTypeName());
        }
      }
      // Add implicit type conversion if necessary
      for (int i = 0; i < right.length; i++) {
        if (!commonType.equals(keys.get(i).get(k).getTypeInfo())) {
          keys.get(i).set(
              k,
              TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc(
              commonType.getTypeName(), keys.get(i).get(k)));
        }
      }
    }
    // regenerate keySerializationInfo because the ReduceSinkOperator's
    // output key types might have changed.
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();


      now.setKeySerializeInfo(PlanUtils.getReduceKeyTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(now.getKeyCols(), "joinkey"), now
          .getOrder()));
    }
  }


  private Operator genJoinPlan(QB qb, HashMap<String, Operator> map)
      throws SemanticException {
    QBJoinTree joinTree = qb.getQbJoinTree();
    Operator joinOp = genJoinOperator(qb, joinTree, map);
    return joinOp;
  }


  /**
   * Extract the filters from the join condition and push them on top of the
   * source operators. This procedure traverses the query tree recursively,
   */
  private void pushJoinFilters(QB qb, QBJoinTree joinTree,
      HashMap<String, Operator> map) throws SemanticException {
    if (joinTree.getJoinSrc() != null) {
      pushJoinFilters(qb, joinTree.getJoinSrc(), map);
    }
    ArrayList<ArrayList<ASTNode>> filters = joinTree.getFiltersForPushing();
    int pos = 0;
    for (String src : joinTree.getBaseSrc()) {
      if (src != null) {
        Operator srcOp = map.get(src);
        ArrayList<ASTNode> filter = filters.get(pos);
        for (ASTNode cond : filter) {
          srcOp = genFilterPlan(qb, cond, srcOp);
        }
        map.put(src, srcOp);
      }
      pos++;
    }
  }


  private List<String> getMapSideJoinTables(QB qb) {
    List<String> cols = new ArrayList<String>();
    ASTNode hints = qb.getParseInfo().getHints();
    for (int pos = 0; pos < hints.getChildCount(); pos++) {
      ASTNode hint = (ASTNode) hints.getChild(pos);
      if (((ASTNode) hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) {
        ASTNode hintTblNames = (ASTNode) hint.getChild(1);
        int numCh = hintTblNames.getChildCount();
        for (int tblPos = 0; tblPos < numCh; tblPos++) {
          String tblName = ((ASTNode) hintTblNames.getChild(tblPos)).getText()
              .toLowerCase();
          if (!cols.contains(tblName)) {
            cols.add(tblName);
          }
        }
      }
    }


    return cols;
  }


  private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree)
      throws SemanticException {
    QBJoinTree joinTree = new QBJoinTree();
    joinTree.setNoOuterJoin(false);


    joinTree.setExpressions(new ArrayList<ArrayList<ASTNode>>());
    joinTree.setFilters(new ArrayList<ArrayList<ASTNode>>());
    joinTree.setFiltersForPushing(new ArrayList<ArrayList<ASTNode>>());


    // Create joinTree structures to fill them up later
    ArrayList<String> rightAliases = new ArrayList<String>();
    ArrayList<String> leftAliases = new ArrayList<String>();
    ArrayList<String> baseSrc = new ArrayList<String>();
    ArrayList<Boolean> preserved = new ArrayList<Boolean>();


    boolean lastPreserved = false;
    int cols = -1;


    for (int i = 0; i < joinParseTree.getChildCount(); i++) {
      ASTNode child = (ASTNode) joinParseTree.getChild(i);


      switch (child.getToken().getType()) {
      case HiveParser.TOK_TABREF:
        // Handle a table - populate aliases appropriately:
        // leftAliases should contain the first table, rightAliases should
        // contain all other tables and baseSrc should contain all tables


        String tableName = unescapeIdentifier(child.getChild(0).getText());
        String alias = child.getChildCount() == 1 ? tableName
            : unescapeIdentifier(child.getChild(child.getChildCount() - 1)
            .getText().toLowerCase());


        if (i == 0) {
          leftAliases.add(alias);
          joinTree.setLeftAlias(alias);
        } else {
          rightAliases.add(alias);
        }
        baseSrc.add(alias);


        preserved.add(lastPreserved);
        lastPreserved = false;
        break;


      case HiveParser.TOK_EXPLIST:
        if (cols == -1 && child.getChildCount() != 0) {
          cols = child.getChildCount();
        } else if (child.getChildCount() != cols) {
          throw new SemanticException("Tables with different or invalid "
              + "number of keys in UNIQUEJOIN");
        }


        ArrayList<ASTNode> expressions = new ArrayList<ASTNode>();
        ArrayList<ASTNode> filt = new ArrayList<ASTNode>();
        ArrayList<ASTNode> filters = new ArrayList<ASTNode>();


        for (Node exp : child.getChildren()) {
          expressions.add((ASTNode) exp);
        }


        joinTree.getExpressions().add(expressions);
        joinTree.getFilters().add(filt);
        joinTree.getFiltersForPushing().add(filters);
        break;


      case HiveParser.KW_PRESERVE:
        lastPreserved = true;
        break;


      case HiveParser.TOK_SUBQUERY:
        throw new SemanticException(
            "Subqueries are not supported in UNIQUEJOIN");


      default:
        throw new SemanticException("Unexpected UNIQUEJOIN structure");
      }
    }


    joinTree.setBaseSrc(baseSrc.toArray(new String[0]));
    joinTree.setLeftAliases(leftAliases.toArray(new String[0]));
    joinTree.setRightAliases(rightAliases.toArray(new String[0]));


    JoinCond[] condn = new JoinCond[preserved.size()];
    for (int i = 0; i < condn.length; i++) {
      condn[i] = new JoinCond(preserved.get(i));
    }
    joinTree.setJoinCond(condn);


    if (qb.getParseInfo().getHints() != null) {
      parseStreamTables(joinTree, qb);
    }


    return joinTree;
  }


  private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree)
      throws SemanticException {
    QBJoinTree joinTree = new QBJoinTree();
    JoinCond[] condn = new JoinCond[1];


    switch (joinParseTree.getToken().getType()) {
    case HiveParser.TOK_LEFTOUTERJOIN:
      joinTree.setNoOuterJoin(false);
      condn[0] = new JoinCond(0, 1, JoinType.LEFTOUTER);
      break;
    case HiveParser.TOK_RIGHTOUTERJOIN:
      joinTree.setNoOuterJoin(false);
      condn[0] = new JoinCond(0, 1, JoinType.RIGHTOUTER);
      break;
    case HiveParser.TOK_FULLOUTERJOIN:
      joinTree.setNoOuterJoin(false);
      condn[0] = new JoinCond(0, 1, JoinType.FULLOUTER);
      break;
    case HiveParser.TOK_LEFTSEMIJOIN:
      joinTree.setNoSemiJoin(false);
      condn[0] = new JoinCond(0, 1, JoinType.LEFTSEMI);
      break;
    default:
      condn[0] = new JoinCond(0, 1, JoinType.INNER);
      joinTree.setNoOuterJoin(true);
      break;
    }


    joinTree.setJoinCond(condn);


    ASTNode left = (ASTNode) joinParseTree.getChild(0);
    ASTNode right = (ASTNode) joinParseTree.getChild(1);


    if ((left.getToken().getType() == HiveParser.TOK_TABREF)
        || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)) {
      String table_name = unescapeIdentifier(left.getChild(0).getText());
      String alias = left.getChildCount() == 1 ? table_name
          : unescapeIdentifier(left.getChild(left.getChildCount() - 1)
          .getText().toLowerCase());
      joinTree.setLeftAlias(alias);
      String[] leftAliases = new String[1];
      leftAliases[0] = alias;
      joinTree.setLeftAliases(leftAliases);
      String[] children = new String[2];
      children[0] = alias;
      joinTree.setBaseSrc(children);
    } else if (isJoinToken(left)) {
      QBJoinTree leftTree = genJoinTree(qb, left);
      joinTree.setJoinSrc(leftTree);
      String[] leftChildAliases = leftTree.getLeftAliases();
      String leftAliases[] = new String[leftChildAliases.length + 1];
      for (int i = 0; i < leftChildAliases.length; i++) {
        leftAliases[i] = leftChildAliases[i];
      }
      leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0];
      joinTree.setLeftAliases(leftAliases);
    } else {
      assert (false);
    }


    if ((right.getToken().getType() == HiveParser.TOK_TABREF)
        || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)) {
      String tableName = unescapeIdentifier(right.getChild(0).getText());
      String alias = right.getChildCount() == 1 ? tableName
          : unescapeIdentifier(right.getChild(right.getChildCount() - 1)
          .getText().toLowerCase());
      String[] rightAliases = new String[1];
      rightAliases[0] = alias;
      joinTree.setRightAliases(rightAliases);
      String[] children = joinTree.getBaseSrc();
      if (children == null) {
        children = new String[2];
      }
      children[1] = alias;
      joinTree.setBaseSrc(children);
      // remember rhs table for semijoin
      if (joinTree.getNoSemiJoin() == false) {
        joinTree.addRHSSemijoin(alias);
      }
    } else {
      assert false;
    }


    ArrayList<ArrayList<ASTNode>> expressions = new ArrayList<ArrayList<ASTNode>>();
    expressions.add(new ArrayList<ASTNode>());
    expressions.add(new ArrayList<ASTNode>());
    joinTree.setExpressions(expressions);


    ArrayList<ArrayList<ASTNode>> filters = new ArrayList<ArrayList<ASTNode>>();
    filters.add(new ArrayList<ASTNode>());
    filters.add(new ArrayList<ASTNode>());
    joinTree.setFilters(filters);


    ArrayList<ArrayList<ASTNode>> filtersForPushing =
      new ArrayList<ArrayList<ASTNode>>();
    filtersForPushing.add(new ArrayList<ASTNode>());
    filtersForPushing.add(new ArrayList<ASTNode>());
    joinTree.setFiltersForPushing(filtersForPushing);


    ASTNode joinCond = (ASTNode) joinParseTree.getChild(2);
    ArrayList<String> leftSrc = new ArrayList<String>();
    parseJoinCondition(joinTree, joinCond, leftSrc);
    if (leftSrc.size() == 1) {
      joinTree.setLeftAlias(leftSrc.get(0));
    }


    // check the hints to see if the user has specified a map-side join. This
    // will be removed later on, once the cost-based
    // infrastructure is in place
    if (qb.getParseInfo().getHints() != null) {
      List<String> mapSideTables = getMapSideJoinTables(qb);
      List<String> mapAliases = joinTree.getMapAliases();


      for (String mapTbl : mapSideTables) {
        boolean mapTable = false;
        for (String leftAlias : joinTree.getLeftAliases()) {
          if (mapTbl.equalsIgnoreCase(leftAlias)) {
            mapTable = true;
          }
        }
        for (String rightAlias : joinTree.getRightAliases()) {
          if (mapTbl.equalsIgnoreCase(rightAlias)) {
            mapTable = true;
          }
        }


        if (mapTable) {
          if (mapAliases == null) {
            mapAliases = new ArrayList<String>();
          }
          mapAliases.add(mapTbl);
          joinTree.setMapSideJoin(true);
        }
      }


      joinTree.setMapAliases(mapAliases);


      parseStreamTables(joinTree, qb);
    }


    return joinTree;
  }


  private void parseStreamTables(QBJoinTree joinTree, QB qb) {
    List<String> streamAliases = joinTree.getStreamAliases();


    for (Node hintNode : qb.getParseInfo().getHints().getChildren()) {
      ASTNode hint = (ASTNode) hintNode;
      if (hint.getChild(0).getType() == HiveParser.TOK_STREAMTABLE) {
        for (int i = 0; i < hint.getChild(1).getChildCount(); i++) {
          if (streamAliases == null) {
            streamAliases = new ArrayList<String>();
          }
          streamAliases.add(hint.getChild(1).getChild(i).getText());
        }
      }
    }


    joinTree.setStreamAliases(streamAliases);
  }


  private void mergeJoins(QB qb, QBJoinTree parent, QBJoinTree node,
      QBJoinTree target, int pos) {
    String[] nodeRightAliases = node.getRightAliases();
    String[] trgtRightAliases = target.getRightAliases();
    String[] rightAliases = new String[nodeRightAliases.length
        + trgtRightAliases.length];


    for (int i = 0; i < trgtRightAliases.length; i++) {
      rightAliases[i] = trgtRightAliases[i];
    }
    for (int i = 0; i < nodeRightAliases.length; i++) {
      rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i];
    }
    target.setRightAliases(rightAliases);


    String[] nodeBaseSrc = node.getBaseSrc();
    String[] trgtBaseSrc = target.getBaseSrc();
    String[] baseSrc = new String[nodeBaseSrc.length + trgtBaseSrc.length - 1];


    for (int i = 0; i < trgtBaseSrc.length; i++) {
      baseSrc[i] = trgtBaseSrc[i];
    }
    for (int i = 1; i < nodeBaseSrc.length; i++) {
      baseSrc[i + trgtBaseSrc.length - 1] = nodeBaseSrc[i];
    }
    target.setBaseSrc(baseSrc);


    ArrayList<ArrayList<ASTNode>> expr = target.getExpressions();
    for (int i = 0; i < nodeRightAliases.length; i++) {
      expr.add(node.getExpressions().get(i + 1));
    }


    ArrayList<ArrayList<ASTNode>> filters = target.getFilters();
    for (int i = 0; i < nodeRightAliases.length; i++) {
      filters.add(node.getFilters().get(i + 1));
    }


    ArrayList<ArrayList<ASTNode>> filter = target.getFiltersForPushing();
    for (int i = 0; i < nodeRightAliases.length; i++) {
      filter.add(node.getFiltersForPushing().get(i + 1));
    }


    if (node.getFiltersForPushing().get(0).size() != 0) {
      ArrayList<ASTNode> filterPos = filter.get(pos);
      filterPos.addAll(node.getFiltersForPushing().get(0));
    }


    if (qb.getQbJoinTree() == node) {
      qb.setQbJoinTree(node.getJoinSrc());
    } else {
      parent.setJoinSrc(node.getJoinSrc());
    }


    if (node.getNoOuterJoin() && target.getNoOuterJoin()) {
      target.setNoOuterJoin(true);
    } else {
      target.setNoOuterJoin(false);
    }


    if (node.getNoSemiJoin() && target.getNoSemiJoin()) {
      target.setNoSemiJoin(true);
    } else {
      target.setNoSemiJoin(false);
    }


    target.mergeRHSSemijoin(node);


    JoinCond[] nodeCondns = node.getJoinCond();
    int nodeCondnsSize = nodeCondns.length;
    JoinCond[] targetCondns = target.getJoinCond();
    int targetCondnsSize = targetCondns.length;
    JoinCond[] newCondns = new JoinCond[nodeCondnsSize + targetCondnsSize];
    for (int i = 0; i < targetCondnsSize; i++) {
      newCondns[i] = targetCondns[i];
    }


    for (int i = 0; i < nodeCondnsSize; i++) {
      JoinCond nodeCondn = nodeCondns[i];
      if (nodeCondn.getLeft() == 0) {
        nodeCondn.setLeft(pos);
      } else {
        nodeCondn.setLeft(nodeCondn.getLeft() + targetCondnsSize);
      }
      nodeCondn.setRight(nodeCondn.getRight() + targetCondnsSize);
      newCondns[targetCondnsSize + i] = nodeCondn;
    }


    target.setJoinCond(newCondns);
    if (target.isMapSideJoin()) {
      assert node.isMapSideJoin();
      List<String> mapAliases = target.getMapAliases();
      for (String mapTbl : node.getMapAliases()) {
        if (!mapAliases.contains(mapTbl)) {
          mapAliases.add(mapTbl);
        }
      }
      target.setMapAliases(mapAliases);
    }
  }


  private int findMergePos(QBJoinTree node, QBJoinTree target) {
    int res = -1;
    String leftAlias = node.getLeftAlias();
    if (leftAlias == null) {
      return -1;
    }


    ArrayList<ASTNode> nodeCondn = node.getExpressions().get(0);
    ArrayList<ASTNode> targetCondn = null;


    if (leftAlias.equals(target.getLeftAlias())) {
      targetCondn = target.getExpressions().get(0);
      res = 0;
    } else {
      for (int i = 0; i < target.getRightAliases().length; i++) {
        if (leftAlias.equals(target.getRightAliases()[i])) {
          targetCondn = target.getExpressions().get(i + 1);
          res = i + 1;
          break;
        }
      }
    }


    if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) {
      return -1;
    }


    for (int i = 0; i < nodeCondn.size(); i++) {
      if (!nodeCondn.get(i).toStringTree().equals(
          targetCondn.get(i).toStringTree())) {
        return -1;
      }
    }


    return res;
  }


  private boolean mergeJoinNodes(QB qb, QBJoinTree parent, QBJoinTree node,
      QBJoinTree target) {
    if (target == null) {
      return false;
    }


    int res = findMergePos(node, target);
    if (res != -1) {
      mergeJoins(qb, parent, node, target, res);
      return true;
    }


    return mergeJoinNodes(qb, parent, node, target.getJoinSrc());
  }


  private void mergeJoinTree(QB qb) {
    QBJoinTree root = qb.getQbJoinTree();
    QBJoinTree parent = null;
    while (root != null) {
      boolean merged = mergeJoinNodes(qb, parent, root, root.getJoinSrc());


      if (parent == null) {
        if (merged) {
          root = qb.getQbJoinTree();
        } else {
          parent = root;
          root = root.getJoinSrc();
        }
      } else {
        parent = parent.getJoinSrc();
        root = parent.getJoinSrc();
      }
    }
  }


  private Operator insertSelectAllPlanForGroupBy(String dest, Operator input)
      throws SemanticException {
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();
    for (int i = 0; i < columns.size(); i++) {
      ColumnInfo col = columns.get(i);
      colList.add(new ExprNodeColumnDesc(col.getType(), col.getInternalName(),
          col.getTabAlias(), col.getIsVirtualCol()));
      columnNames.add(col.getInternalName());
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(colList, columnNames, true), new RowSchema(inputRR
        .getColumnInfos()), input), inputRR);
    output.setColumnExprMap(input.getColumnExprMap());
    return output;
  }


  // Return the common distinct expression
  // There should be more than 1 destination, with group bys in all of them.
  private List<ASTNode> getCommonDistinctExprs(QB qb, Operator input) {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBParseInfo qbp = qb.getParseInfo();


    TreeSet<String> ks = new TreeSet<String>();
    ks.addAll(qbp.getClauseNames());


    // Go over all the destination tables
    if (ks.size() <= 1) {
      return null;
    }


    List<ExprNodeDesc> oldList = null;
    List<ASTNode> oldASTList = null;


    for (String dest : ks) {
      // If a filter is present, common processing is not possible
      if (qbp.getWhrForClause(dest) != null) {
        return null;
      }


      if (qbp.getAggregationExprsForClause(dest).size() == 0
          && getGroupByForClause(qbp, dest).size() == 0) {
        return null;
      }


      // All distinct expressions must be the same
      List<ASTNode> list = qbp.getDistinctFuncExprsForClause(dest);
      if (list.isEmpty()) {
        return null;
      }


      List<ExprNodeDesc> currDestList = new ArrayList<ExprNodeDesc>();
      List<ASTNode> currASTList = new ArrayList<ASTNode>();
      for (ASTNode value: list) {
        try {
          // 0 is function name
          for (int i = 1; i < value.getChildCount(); i++) {
            ASTNode parameter = (ASTNode) value.getChild(i);
            currDestList.add(genExprNodeDesc(parameter, inputRR));
            currASTList.add(parameter);
          }
        } catch (SemanticException e) {
          return null;
        }
        if (oldList == null) {
          oldList = currDestList;
          oldASTList = currASTList;
        } else {
          if (oldList.size() != currDestList.size()) {
            return null;
          }
          for (int pos = 0; pos < oldList.size(); pos++) {
            if (!oldList.get(pos).isSame(currDestList.get(pos))) {
              return null;
            }
          }
        }
      }
    }


    return oldASTList;
  }


  private Operator createCommonReduceSink(QB qb, Operator input)
      throws SemanticException {
    // Go over all the tables and extract the common distinct key
    List<ASTNode> distExprs = getCommonDistinctExprs(qb, input);


    QBParseInfo qbp = qb.getParseInfo();
    TreeSet<String> ks = new TreeSet<String>();
    ks.addAll(qbp.getClauseNames());


    // Pass the entire row
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    RowResolver reduceSinkOutputRowResolver = new RowResolver();
    reduceSinkOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();


    // Pre-compute distinct group-by keys and store in reduceKeys


    List<String> outputColumnNames = new ArrayList<String>();
    for (ASTNode distn : distExprs) {
      ExprNodeDesc distExpr = genExprNodeDesc(distn, inputRR);
      reduceKeys.add(distExpr);
      if (reduceSinkOutputRowResolver.getExpression(distn) == null) {
        outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
        String field = Utilities.ReduceField.KEY.toString() + "."
            + getColumnInternalName(reduceKeys.size() - 1);
        ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
            reduceKeys.size() - 1).getTypeInfo(), "", false);
        reduceSinkOutputRowResolver.putExpression(distn, colInfo);
        colExprMap.put(colInfo.getInternalName(), distExpr);
      }
    }


    // Go over all the grouping keys and aggregations
    for (String dest : ks) {


      List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
      for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);


        if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
          ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR);
          reduceValues.add(grpByExprNode);
          String field = Utilities.ReduceField.VALUE.toString() + "."
              + getColumnInternalName(reduceValues.size() - 1);
          ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(
              reduceValues.size() - 1).getTypeInfo(), "", false);
          reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
          outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1));
        }
      }


      // For each aggregation
      HashMap<String, ASTNode> aggregationTrees = qbp
          .getAggregationExprsForClause(dest);
      assert (aggregationTrees != null);


      for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        value.getChild(0).getText();


        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
          ASTNode paraExpr = (ASTNode) value.getChild(i);


          if (reduceSinkOutputRowResolver.getExpression(paraExpr) == null) {
            ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR);
            reduceValues.add(paraExprNode);
            String field = Utilities.ReduceField.VALUE.toString() + "."
                + getColumnInternalName(reduceValues.size() - 1);
            ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(
                reduceValues.size() - 1).getTypeInfo(), "", false);
            reduceSinkOutputRowResolver.putExpression(paraExpr, colInfo);
            outputColumnNames
                .add(getColumnInternalName(reduceValues.size() - 1));
          }
        }
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1),
        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
        reduceSinkOutputRowResolver);


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }


  @SuppressWarnings("nls")
  private Operator genBodyPlan(QB qb, Operator input) throws SemanticException {


    QBParseInfo qbp = qb.getParseInfo();


    TreeSet<String> ks = new TreeSet<String>(qbp.getClauseNames());


    // For multi-group by with the same distinct, we ignore all user hints
    // currently. It doesnt matter whether he has asked to do
    // map-side aggregation or not. Map side aggregation is turned off
    boolean optimizeMultiGroupBy = (getCommonDistinctExprs(qb, input) != null);
    Operator curr = input;


    // If there are multiple group-bys, map-side aggregation is turned off,
    // there are no filters
    // and there is a single distinct, optimize that. Spray initially by the
    // distinct key,
    // no computation at the mapper. Have multiple group by operators at the
    // reducer - and then
    // proceed
    if (optimizeMultiGroupBy) {
      curr = createCommonReduceSink(qb, input);


      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
      // create a forward operator
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
          new RowSchema(currRR.getColumnInfos()), curr), currRR);


      for (String dest : ks) {
        curr = input;
        curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
        curr = genSelectPlan(dest, qb, curr);
        Integer limit = qbp.getDestLimit(dest);
        if (limit != null) {
          curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true);
          qb.getParseInfo().setOuterQueryLimit(limit.intValue());
        }
        curr = genFileSinkPlan(dest, qb, curr);
      }
    } else {
      // Go over all the destination tables
      for (String dest : ks) {
        curr = input;


        if (qbp.getWhrForClause(dest) != null) {
          curr = genFilterPlan(dest, qb, curr);
        }


        if (qbp.getAggregationExprsForClause(dest).size() != 0
            || getGroupByForClause(qbp, dest).size() > 0) {
          //multiple distincts is not supported with skew in data
          if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)
              .equalsIgnoreCase("true") &&
             qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
            throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.
                getMsg());
          }
          // insert a select operator here used by the ColumnPruner to reduce
          // the data to shuffle
          curr = insertSelectAllPlanForGroupBy(dest, curr);
          if (conf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)
              .equalsIgnoreCase("true")) {
            if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)
                .equalsIgnoreCase("false")) {
              curr = genGroupByPlanMapAggr1MR(dest, qb, curr);
            } else {
              curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
            }
          } else if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)
              .equalsIgnoreCase("true")) {
            curr = genGroupByPlan2MR(dest, qb, curr);
          } else {
            curr = genGroupByPlan1MR(dest, qb, curr);
          }
        }


        // Insert HAVING plan here
        if (qbp.getHavingForClause(dest) != null) {
          if (getGroupByForClause(qbp, dest).size() == 0) {
            throw new SemanticException("HAVING specified without GROUP BY");
          }
          curr = genHavingPlan(dest, qb, curr);
        }


        curr = genSelectPlan(dest, qb, curr);
        Integer limit = qbp.getDestLimit(dest);


        if (qbp.getClusterByForClause(dest) != null
            || qbp.getDistributeByForClause(dest) != null
            || qbp.getOrderByForClause(dest) != null
            || qbp.getSortByForClause(dest) != null) {


          int numReducers = -1;


          // Use only 1 reducer if order by is present
          if (qbp.getOrderByForClause(dest) != null) {
            numReducers = 1;
          }


          curr = genReduceSinkPlan(dest, qb, curr, numReducers);
        }


        if (qbp.getIsSubQ()) {
          if (limit != null) {
            // In case of order by, only 1 reducer is used, so no need of
            // another shuffle
            curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp
                .getOrderByForClause(dest) != null ? false : true);
          }
        } else {
          curr = genConversionOps(dest, qb, curr);
          // exact limit can be taken care of by the fetch operator
          if (limit != null) {
            boolean extraMRStep = true;


            if (qb.getIsQuery() && qbp.getClusterByForClause(dest) == null
                && qbp.getSortByForClause(dest) == null) {
              extraMRStep = false;
            }


            curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(),
                extraMRStep);
            qb.getParseInfo().setOuterQueryLimit(limit.intValue());
          }
          curr = genFileSinkPlan(dest, qb, curr);
        }


        // change curr ops row resolver's tab aliases to query alias if it
        // exists
        if (qb.getParseInfo().getAlias() != null) {
          RowResolver rr = opParseCtx.get(curr).getRowResolver();
          RowResolver newRR = new RowResolver();
          String alias = qb.getParseInfo().getAlias();
          for (ColumnInfo colInfo : rr.getColumnInfos()) {
            String name = colInfo.getInternalName();
            String[] tmp = rr.reverseLookup(name);
            newRR.put(alias, tmp[1], colInfo);
          }
          opParseCtx.get(curr).setRowResolver(newRR);
        }
      }
    }


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Body Plan for Query Block " + qb.getId());
    }


    return curr;
  }


  @SuppressWarnings("nls")
  private Operator genUnionPlan(String unionalias, String leftalias,
      Operator leftOp, String rightalias, Operator rightOp)
      throws SemanticException {


    // Currently, the unions are not merged - each union has only 2 parents. So,
    // a n-way union will lead to (n-1) union operators.
    // This can be easily merged into 1 union
    RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
    RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
    HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
    HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
    // make sure the schemas of both sides are the same
    if (leftmap.size() != rightmap.size()) {
      throw new SemanticException("Schema of both sides of union should match.");
    }
    for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
      String field = lEntry.getKey();
      ColumnInfo lInfo = lEntry.getValue();
      ColumnInfo rInfo = rightmap.get(field);
      if (rInfo == null) {
        throw new SemanticException(
            "Schema of both sides of union should match. " + rightalias
            + " does not have the field " + field);
      }
      if (lInfo == null) {
        throw new SemanticException(
            "Schema of both sides of union should match. " + leftalias
            + " does not have the field " + field);
      }
      if (!lInfo.getInternalName().equals(rInfo.getInternalName())) {
        throw new SemanticException(
            "Schema of both sides of union should match: " + field + ":"
            + lInfo.getInternalName() + " " + rInfo.getInternalName());
      }
      if (!lInfo.getType().getTypeName().equals(rInfo.getType().getTypeName())) {
        throw new SemanticException(
            "Schema of both sides of union should match: Column " + field
            + " is of type " + lInfo.getType().getTypeName()
            + " on first table and type " + rInfo.getType().getTypeName()
            + " on second table");
      }
    }


    // construct the forward operator
    RowResolver unionoutRR = new RowResolver();
    for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
      String field = lEntry.getKey();
      ColumnInfo lInfo = lEntry.getValue();
      unionoutRR.put(unionalias, field, lInfo);
    }


    // If one of the children is a union, merge with it
    // else create a new one
    if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) {
      if (leftOp instanceof UnionOperator) {
        // make left a child of right
        List<Operator<? extends Serializable>> child =
          new ArrayList<Operator<? extends Serializable>>();
        child.add(leftOp);
        rightOp.setChildOperators(child);


        List<Operator<? extends Serializable>> parent = leftOp
            .getParentOperators();
        parent.add(rightOp);


        UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(leftOp, unionoutRR);
      } else {
        // make right a child of left
        List<Operator<? extends Serializable>> child =
          new ArrayList<Operator<? extends Serializable>>();
        child.add(rightOp);
        leftOp.setChildOperators(child);


        List<Operator<? extends Serializable>> parent = rightOp
            .getParentOperators();
        parent.add(leftOp);
        UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);


        return putOpInsertMap(rightOp, unionoutRR);
      }
    }


    // Create a new union operator
    Operator<? extends Serializable> unionforward = OperatorFactory
        .getAndMakeChild(new UnionDesc(), new RowSchema(unionoutRR
        .getColumnInfos()));


    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends Serializable>> child =
      new ArrayList<Operator<? extends Serializable>>();
    child.add(unionforward);
    rightOp.setChildOperators(child);


    child = new ArrayList<Operator<? extends Serializable>>();
    child.add(unionforward);
    leftOp.setChildOperators(child);


    List<Operator<? extends Serializable>> parent =
      new ArrayList<Operator<? extends Serializable>>();
    parent.add(leftOp);
    parent.add(rightOp);
    unionforward.setParentOperators(parent);


    // create operator info list to return
    return putOpInsertMap(unionforward, unionoutRR);
  }


  /**
   * Generates the sampling predicate from the TABLESAMPLE clause information.
   * This function uses the bucket column list to decide the expression inputs
   * to the predicate hash function in case useBucketCols is set to true,
   * otherwise the expression list stored in the TableSample is used. The bucket
   * columns of the table are used to generate this predicate in case no
   * expressions are provided on the TABLESAMPLE clause and the table has
   * clustering columns defined in it's metadata. The predicate created has the
   * following structure:
   *
   * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
   *
   * @param ts
   *          TABLESAMPLE clause information
   * @param bucketCols
   *          The clustering columns of the table
   * @param useBucketCols
   *          Flag to indicate whether the bucketCols should be used as input to
   *          the hash function
   * @param alias
   *          The alias used for the table in the row resolver
   * @param rwsch
   *          The row resolver used to resolve column references
   * @param qbm
   *          The metadata information for the query block which is used to
   *          resolve unaliased columns
   * @param planExpr
   *          The plan tree for the expression. If the user specified this, the
   *          parse expressions are not used
   * @return exprNodeDesc
   * @exception SemanticException
   */
  private ExprNodeDesc genSamplePredicate(TableSample ts,
      List<String> bucketCols, boolean useBucketCols, String alias,
      RowResolver rwsch, QBMetaData qbm, ExprNodeDesc planExpr)
      throws SemanticException {


    ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(
        TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));


    ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(
        TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));


    ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(
        TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));


    ArrayList<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
    if (planExpr != null) {
      args.add(planExpr);
    } else if (useBucketCols) {
      for (String col : bucketCols) {
        ColumnInfo ci = rwsch.get(alias, col);
        // TODO: change type to the one in the table schema
        args.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), ci
            .getTabAlias(), ci.getIsVirtualCol()));
      }
    } else {
      for (ASTNode expr : ts.getExprs()) {
        args.add(genExprNodeDesc(expr, rwsch));
      }
    }


    ExprNodeDesc equalsExpr = null;
    {
      ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(
          TypeInfoFactory.intTypeInfo, new GenericUDFHash(), args);
      assert (hashfnExpr != null);
      LOG.info("hashfnExpr = " + hashfnExpr);
      ExprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor
          .getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
      assert (andExpr != null);
      LOG.info("andExpr = " + andExpr);
      ExprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor
          .getFuncExprNodeDesc("%", andExpr, denominatorExpr);
      assert (modExpr != null);
      LOG.info("modExpr = " + modExpr);
      LOG.info("numeratorExpr = " + numeratorExpr);
      equalsExpr = TypeCheckProcFactory.DefaultExprProcessor
          .getFuncExprNodeDesc("==", modExpr, numeratorExpr);
      LOG.info("equalsExpr = " + equalsExpr);
      assert (equalsExpr != null);
    }
    return equalsExpr;
  }


  @SuppressWarnings("nls")
  private Operator genTablePlan(String alias, QB qb) throws SemanticException {


    String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
    Table tab = qb.getMetaData().getSrcForAlias(alias);
    RowResolver rwsch;


    // is the table already present
    Operator<? extends Serializable> top = topOps.get(alias_id);
    Operator<? extends Serializable> dummySel = topSelOps.get(alias_id);
    if (dummySel != null) {
      top = dummySel;
    }


    if (top == null) {
      rwsch = new RowResolver();
      try {
        StructObjectInspector rowObjectInspector = (StructObjectInspector) tab
            .getDeserializer().getObjectInspector();
        List<? extends StructField> fields = rowObjectInspector
            .getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
          rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields
              .get(i).getFieldName(), TypeInfoUtils
              .getTypeInfoFromObjectInspector(fields.get(i)
              .getFieldObjectInspector()), alias, false));
        }
      } catch (SerDeException e) {
        throw new RuntimeException(e);
      }
      // Hack!! - refactor once the metadata APIs with types are ready
      // Finally add the partitioning columns
      for (FieldSchema part_col : tab.getPartCols()) {
        LOG.trace("Adding partition col: " + part_col);
        // TODO: use the right type by calling part_col.getType() instead of
        // String.class
        rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(),
            TypeInfoFactory.stringTypeInfo, alias, true));
      }


      //put all virutal columns in RowResolver.
      Iterator<VirtualColumn> vcs = VirtualColumn.registry.values().iterator();
      //use a list for easy cumtomize
      List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
      while (vcs.hasNext()) {
        VirtualColumn vc = vcs.next();
        rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(),
            vc.getTypeInfo(), alias, true, vc.getIsHidden()));
        vcList.add(vc);
      }


      // Create the root of the operator tree
      TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
      setupStats(tsDesc, qb.getParseInfo(), tab, alias);


      top = putOpInsertMap(OperatorFactory.get(tsDesc,
          new RowSchema(rwsch.getColumnInfos())), rwsch);


      // Add this to the list of top operators - we always start from a table
      // scan
      topOps.put(alias_id, top);


      // Add a mapping from the table scan operator to Table
      topToTable.put((TableScanOperator) top, tab);
    } else {
      rwsch = opParseCtx.get(top).getRowResolver();
      top.setChildOperators(null);
    }


    // check if this table is sampled and needs more than input pruning
    Operator<? extends Serializable> tableOp = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
      int num = ts.getNumerator();
      int den = ts.getDenominator();
      ArrayList<ASTNode> sampleExprs = ts.getExprs();


      // TODO: Do the type checking of the expressions
      List<String> tabBucketCols = tab.getBucketCols();
      int numBuckets = tab.getNumBuckets();


      // If there are no sample cols and no bucket cols then throw an error
      if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
        throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
            + tab.getTableName());
      }


      if (num > den) {
        throw new SemanticException(
            ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " "
            + tab.getTableName());
      }


      // check if a predicate is needed
      // predicate is needed if either input pruning is not enough
      // or if input pruning is not possible


      // check if the sample columns are the same as the table bucket columns
      boolean colsEqual = true;
      if ((sampleExprs.size() != tabBucketCols.size())
          && (sampleExprs.size() != 0)) {
        colsEqual = false;
      }


      for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
        boolean colFound = false;
        for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
          if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
            break;
          }


          if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
              .equalsIgnoreCase(tabBucketCols.get(j))) {
            colFound = true;
          }
        }
        colsEqual = (colsEqual && colFound);
      }


      // Check if input can be pruned
      ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));


      // check if input pruning is enough
      if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
          && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {


        // input pruning is enough; add the filter for the optimizer to use it
        // later
        LOG.info("No need for sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
            .getDenominator(), tabBucketCols, true)),
            new RowSchema(rwsch.getColumnInfos()), top);
      } else {
        // need to add filter
        // create tableOp to be filterDesc and set as child to 'top'
        LOG.info("Need sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true),
            new RowSchema(rwsch.getColumnInfos()), top);
      }
    } else {
      boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
      if (testMode) {
        String tabName = tab.getTableName();


        // has the user explicitly asked not to sample this table
        String unSampleTblList = conf
            .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
        String[] unSampleTbls = unSampleTblList.split(",");
        boolean unsample = false;
        for (String unSampleTbl : unSampleTbls) {
          if (tabName.equalsIgnoreCase(unSampleTbl)) {
            unsample = true;
          }
        }


        if (!unsample) {
          int numBuckets = tab.getNumBuckets();


          // If the input table is bucketed, choose the first bucket
          if (numBuckets > 0) {
            TableSample tsSample = new TableSample(1, numBuckets);
            tsSample.setInputPruning(true);
            qb.getParseInfo().setTabSample(alias, tsSample);
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
            tableOp = OperatorFactory
                .getAndMakeChild(new FilterDesc(samplePred, true,
                new sampleDesc(tsSample.getNumerator(), tsSample
                .getDenominator(), tab.getBucketCols(), true)),
                new RowSchema(rwsch.getColumnInfos()), top);
            LOG.info("No need for sample filter");
          } else {
            // The table is not bucketed, add a dummy filter :: rand()
            int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
            TableSample tsSample = new TableSample(1, freq);
            tsSample.setInputPruning(false);
            qb.getParseInfo().setTabSample(alias, tsSample);
            LOG.info("Need sample filter");
            ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor
                .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer
                .valueOf(460476415)));
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false,
                alias, rwsch, qb.getMetaData(), randFunc);
            tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
                samplePred, true),
                new RowSchema(rwsch.getColumnInfos()), top);
          }
        }
      }
    }


    Operator output = putOpInsertMap(tableOp, rwsch);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Table Plan for " + alias + " " + tableOp.toString());
    }


    return output;
  }


  private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias)
      throws SemanticException {


    if (!qbp.isAnalyzeCommand()) {
      tsDesc.setGatherStats(false);
    } else {
      tsDesc.setGatherStats(true);


      String tblName = tab.getTableName();
      tableSpec tblSpec = qbp.getTableSpec(alias);
      Map<String, String> partSpec = tblSpec.getPartSpec();


      if (partSpec != null) {
        List<String> cols = new ArrayList<String>();
        cols.addAll(partSpec.keySet());
        tsDesc.setPartColumns(cols);
      }


      // Theoretically the key prefix could be any unique string shared
      // between TableScanOperator (when publishing) and StatsTask (when aggregating).
      // Here we use
      //       table_name + partitionSec
      // as the prefix for easy of read during explain and debugging.
      // Currently, partition spec can only be static partition.
      String k = tblName + Path.SEPARATOR;
      tsDesc.setStatsAggPrefix(k);


      // set up WritenEntity for replication
      outputs.add(new WriteEntity(tab, true));


      // add WriteEntity for each matching partition
      if (tab.isPartitioned()) {
        if (partSpec == null) {
          throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
        }
        List<Partition> partitions = qbp.getTableSpec().partitions;
        if (partitions != null) {
          for (Partition partn : partitions) {
            // inputs.add(new ReadEntity(partn)); // is this needed at all?
            outputs.add(new WriteEntity(partn, true));
          }
        }
      }
    }
  }


  private Operator genPlan(QBExpr qbexpr) throws SemanticException {
    if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
      return genPlan(qbexpr.getQB());
    }
    if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) {
      Operator qbexpr1Ops = genPlan(qbexpr.getQBExpr1());
      Operator qbexpr2Ops = genPlan(qbexpr.getQBExpr2());


      return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(),
          qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops);
    }
    return null;
  }


  @SuppressWarnings("nls")
  public Operator genPlan(QB qb) throws SemanticException {


    // First generate all the opInfos for the elements in the from clause
    HashMap<String, Operator> aliasToOpInfo = new HashMap<String, Operator>();


    // Recurse over the subqueries to fill the subquery part of the plan
    for (String alias : qb.getSubqAliases()) {
      QBExpr qbexpr = qb.getSubqForAlias(alias);
      aliasToOpInfo.put(alias, genPlan(qbexpr));
      qbexpr.setAlias(alias);
    }


    // Recurse over all the source tables
    for (String alias : qb.getTabAliases()) {
      Operator op = genTablePlan(alias, qb);
      aliasToOpInfo.put(alias, op);
    }


    // For all the source tables that have a lateral view, attach the
    // appropriate operators to the TS
    genLateralViewPlans(aliasToOpInfo, qb);


    Operator srcOpInfo = null;


    // process join
    if (qb.getParseInfo().getJoinExpr() != null) {
      ASTNode joinExpr = qb.getParseInfo().getJoinExpr();


      if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
        QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr);
        qb.setQbJoinTree(joinTree);
      } else {
        QBJoinTree joinTree = genJoinTree(qb, joinExpr);
        qb.setQbJoinTree(joinTree);
        mergeJoinTree(qb);
      }


      // if any filters are present in the join tree, push them on top of the
      // table
      pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo);
      srcOpInfo = genJoinPlan(qb, aliasToOpInfo);
    } else {
      // Now if there are more than 1 sources then we have a join case
      // later we can extend this to the union all case as well
      srcOpInfo = aliasToOpInfo.values().iterator().next();
    }


    Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo);


    if (LOG.isDebugEnabled()) {
      LOG.debug("Created Plan for Query Block " + qb.getId());
    }


    this.qb = qb;
    return bodyOpInfo;
  }


  /**
   * Generates the operator DAG needed to implement lateral views and attaches
   * it to the TS operator.
   *
   * @param aliasToOpInfo
   *          A mapping from a table alias to the TS operator. This function
   *          replaces the operator mapping as necessary
   * @param qb
   * @throws SemanticException
   */


  void genLateralViewPlans(HashMap<String, Operator> aliasToOpInfo, QB qb)
      throws SemanticException {
    Map<String, ArrayList<ASTNode>> aliasToLateralViews = qb.getParseInfo()
        .getAliasToLateralViews();
    for (Entry<String, Operator> e : aliasToOpInfo.entrySet()) {
      String alias = e.getKey();
      // See if the alias has a lateral view. If so, chain the lateral view
      // operator on
      ArrayList<ASTNode> lateralViews = aliasToLateralViews.get(alias);
      if (lateralViews != null) {
        Operator op = e.getValue();


        for (ASTNode lateralViewTree : aliasToLateralViews.get(alias)) {
          // There are 2 paths from the TS operator (or a previous LVJ operator)
          // to the same LateralViewJoinOperator.
          // TS -> SelectOperator(*) -> LateralViewJoinOperator
          // TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0
          // -> LateralViewJoinOperator
          //


          RowResolver lvForwardRR = new RowResolver();
          RowResolver source = opParseCtx.get(op).getRowResolver();
          for (ColumnInfo col : source.getColumnInfos()) {
            if(col.getIsVirtualCol() && col.isHiddenVirtualCol()) {
              continue;
            }
            String[] tabCol = source.reverseLookup(col.getInternalName());
            lvForwardRR.put(tabCol[0], tabCol[1], col);
          }


          Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(
              new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()),
              op), lvForwardRR);


          // The order in which the two paths are added is important. The
          // lateral view join operator depends on having the select operator
          // give it the row first.


          // Get the all path by making a select(*).
          RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
          //Operator allPath = op;
          Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(
                            new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()),
                            lvForward), allPathRR);
          // Get the UDTF Path
          QB blankQb = new QB(null, null, false);
          Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
              .getChild(0), blankQb, lvForward);
          // add udtf aliases to QB
          for (String udtfAlias : blankQb.getAliases()) {
            qb.addAlias(udtfAlias);
          }
          RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();


          // Merge the two into the lateral view join
          // The cols of the merged result will be the combination of both the
          // cols of the UDTF path and the cols of the all path. The internal
          // names have to be changed to avoid conflicts


          RowResolver lateralViewRR = new RowResolver();
          ArrayList<String> outputInternalColNames = new ArrayList<String>();


          LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames);
          LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames);


          // For PPD, we need a column to expression map so that during the walk,
          // the processor knows how to transform the internal col names.
          // Following steps are dependant on the fact that we called
          // LVmerge.. in the above order
          Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();


          int i=0;
          for (ColumnInfo c : allPathRR.getColumnInfos()) {
            String internalName = getColumnInternalName(i);
            i++;
            colExprMap.put(internalName,
                new ExprNodeColumnDesc(c.getType(), c.getInternalName(),
                    c.getTabAlias(), c.getIsVirtualCol()));
          }


          Operator lateralViewJoin = putOpInsertMap(OperatorFactory
              .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames),
                  new RowSchema(lateralViewRR.getColumnInfos()), allPath,
                  udtfPath), lateralViewRR);
          lateralViewJoin.setColumnExprMap(colExprMap);
          op = lateralViewJoin;
        }
        e.setValue(op);
      }
    }
  }


  /**
   * A helper function that gets all the columns and respective aliases in the
   * source and puts them into dest. It renames the internal names of the
   * columns based on getColumnInternalName(position).
   *
   * Note that this helper method relies on RowResolver.getColumnInfos()
   * returning the columns in the same order as they will be passed in the
   * operator DAG.
   *
   * @param source
   * @param dest
   * @param outputColNames
   *          - a list to which the new internal column names will be added, in
   *          the same order as in the dest row resolver
   */
  private void LVmergeRowResolvers(RowResolver source, RowResolver dest,
      ArrayList<String> outputInternalColNames) {
    for (ColumnInfo c : source.getColumnInfos()) {
      String internalName = getColumnInternalName(outputInternalColNames.size());
      outputInternalColNames.add(internalName);
      ColumnInfo newCol = new ColumnInfo(internalName, c.getType(), c
          .getTabAlias(), c.getIsVirtualCol());
      String[] tableCol = source.reverseLookup(c.getInternalName());
      String tableAlias = tableCol[0];
      String colAlias = tableCol[1];
      dest.put(tableAlias, colAlias, newCol);
    }
  }


  @SuppressWarnings("nls")
  private void genMapRedTasks(QB qb) throws SemanticException {
    FetchWork fetch = null;
    List<Task<? extends Serializable>> mvTask = new ArrayList<Task<? extends Serializable>>();
    FetchTask fetchTask = null;


    QBParseInfo qbParseInfo = qb.getParseInfo();


    // Does this query need reduce job
    if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()) {
      boolean noMapRed = false;


      Iterator<Map.Entry<String, Table>> iter = qb.getMetaData()
          .getAliasToTable().entrySet().iterator();
      Table tab = (iter.next()).getValue();
      if (!tab.isPartitioned()) {
        if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
          fetch = new FetchWork(tab.getPath().toString(), Utilities
              .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
          noMapRed = true;
          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureTableJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // In case of a select, use a fetch task instead of a move task
    if (qb.getIsQuery()) {
      if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
        throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg());
      }
      String cols = loadFileWork.get(0).getColumns();
      String colTypes = loadFileWork.get(0).getColumnTypes();


      String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
      TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);


      fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
          resultTab, qb.getParseInfo().getOuterQueryLimit());


      fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
      setFetchTask(fetchTask);
    } else {
      for (LoadTableDesc ltd : loadTableWork) {
        Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
            conf);
        mvTask.add(tsk);
      }


      boolean oneLoadFile = true;
      for (LoadFileDesc lfd : loadFileWork) {
        if (qb.isCTAS()) {
          assert (oneLoadFile); // should not have more than 1 load file for
          // CTAS
          // make the movetask's destination directory the table's destination.
          String location = qb.getTableDesc().getLocation();
          if (location == null) {
            // get the table's default location
            location = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
            assert (location.length() > 0);
            if (location.charAt(location.length() - 1) != '/') {
              location += '/';
            }
            location += qb.getTableDesc().getTableName().toLowerCase();
          }
          lfd.setTargetDir(location);
          oneLoadFile = false;
        }
        mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false),
            conf));
      }
    }


    // generate map reduce plans
    GenMRProcContext procCtx = new GenMRProcContext(
        conf,
        new HashMap<Operator<? extends Serializable>, Task<? extends Serializable>>(),
        new ArrayList<Operator<? extends Serializable>>(), getParseContext(),
        mvTask, rootTasks,
        new LinkedHashMap<Operator<? extends Serializable>, GenMapRedCtx>(),
        inputs, outputs);


    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"),
        new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"),
        new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"),
        new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"),
        new GenMRRedSink4());
    opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"),
        MapJoinFactory.getTableScanMapJoin());
    opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"),
        MapJoinFactory.getReduceSinkMapJoin());
    opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"),
        MapJoinFactory.getUnionMapJoin());
    opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"),
        MapJoinFactory.getMapJoinMapJoin());
    opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"),
        MapJoinFactory.getMapJoin());


    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules,
        procCtx);


    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(topOps.values());
    ogw.startWalking(topNodes, null);


    // reduce sink does not have any kids - since the plan by now has been
    // broken up into multiple
    // tasks, iterate over all tasks.
    // For each task, go over all operators recursively
    for (Task<? extends Serializable> rootTask : rootTasks) {
      breakTaskTree(rootTask);
    }


    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
      setKeyDescTaskTree(rootTask);
    }


    PhysicalContext physicalContext = new PhysicalContext(conf,
        getParseContext(), ctx, rootTasks, fetchTask);
    PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer(
        physicalContext, conf);
    physicalOptimizer.optimize();


    // For each operator, generate the counters if needed
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
      for (Task<? extends Serializable> rootTask : rootTasks) {
        generateCountersTask(rootTask);
      }
    }


    decideExecMode(rootTasks, ctx);


    if (qb.isCTAS()) {
      // generate a DDL task and make it a dependent task of the leaf
      CreateTableDesc crtTblDesc = qb.getTableDesc();


      validateCreateTable(crtTblDesc);


      // Clear the output for CTAS since we don't need the output from the
      // mapredWork, the
      // DDLWork at the tail of the chain will have the output
      getOutputs().clear();


      Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(
          getInputs(), getOutputs(), crtTblDesc), conf);


      // find all leaf tasks and make the DDLTask as a dependent task of all of
      // them
      HashSet<Task<? extends Serializable>> leaves = new HashSet<Task<? extends Serializable>>();
      getLeafTasks(rootTasks, leaves);
      assert (leaves.size() > 0);
      for (Task<? extends Serializable> task : leaves) {
        task.addDependentTask(crtTblTask);
      }
    }
  }


  /**
   * Find all leaf tasks of the list of root tasks.
   */
  private void getLeafTasks(List<Task<? extends Serializable>> rootTasks,
      HashSet<Task<? extends Serializable>> leaves) {


    for (Task<? extends Serializable> root : rootTasks) {
      getLeafTasks(root, leaves);
    }
  }


  private void getLeafTasks(Task<? extends Serializable> task,
      HashSet<Task<? extends Serializable>> leaves) {
    if (task.getDependentTasks() == null) {
      if (!leaves.contains(task)) {
        leaves.add(task);
      }
    } else {
      getLeafTasks(task.getDependentTasks(), leaves);
    }
  }


  // loop over all the tasks recursviely
  private void generateCountersTask(Task<? extends Serializable> task) {
    if (task instanceof ExecDriver) {
      HashMap<String, Operator<? extends Serializable>> opMap = ((MapredWork) task
          .getWork()).getAliasToWork();
      if (!opMap.isEmpty()) {
        for (Operator<? extends Serializable> op : opMap.values()) {
          generateCountersOperator(op);
        }
      }


      Operator<? extends Serializable> reducer = ((MapredWork) task.getWork())
          .getReducer();
      if (reducer != null) {
        LOG.info("Generating counters for operator " + reducer);
        generateCountersOperator(reducer);
      }
    } else if (task instanceof ConditionalTask) {
      List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task)
          .getListTasks();
      for (Task<? extends Serializable> tsk : listTasks) {
        generateCountersTask(tsk);
      }
    }


    // Start the counters from scratch - a hack for hadoop 17.
    Operator.resetLastEnumUsed();


    if (task.getChildTasks() == null) {
      return;
    }


    for (Task<? extends Serializable> childTask : task.getChildTasks()) {
      generateCountersTask(childTask);
    }
  }


  private void generateCountersOperator(Operator<? extends Serializable> op) {
    op.assignCounterNameToEnum();


    if (op.getChildOperators() == null) {
      return;
    }


    for (Operator<? extends Serializable> child : op.getChildOperators()) {
      generateCountersOperator(child);
    }
  }


  // loop over all the tasks recursviely
  private void breakTaskTree(Task<? extends Serializable> task) {


    if (task instanceof ExecDriver) {
      HashMap<String, Operator<? extends Serializable>> opMap = ((MapredWork) task
          .getWork()).getAliasToWork();
      if (!opMap.isEmpty()) {
        for (Operator<? extends Serializable> op : opMap.values()) {
          breakOperatorTree(op);
        }
      }
    } else if (task instanceof ConditionalTask) {
      List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task)
          .getListTasks();
      for (Task<? extends Serializable> tsk : listTasks) {
        breakTaskTree(tsk);
      }
    }


    if (task.getChildTasks() == null) {
      return;
    }


    for (Task<? extends Serializable> childTask : task.getChildTasks()) {
      breakTaskTree(childTask);
    }
  }


  // loop over all the operators recursviely
  private void breakOperatorTree(Operator<? extends Serializable> topOp) {
    if (topOp instanceof ReduceSinkOperator) {
      topOp.setChildOperators(null);
    }


    if (topOp.getChildOperators() == null) {
      return;
    }


    for (Operator<? extends Serializable> op : topOp.getChildOperators()) {
      breakOperatorTree(op);
    }
  }


  // loop over all the tasks recursviely
  private void setKeyDescTaskTree(Task<? extends Serializable> task) {


    if (task instanceof ExecDriver) {
      MapredWork work = (MapredWork) task.getWork();
      work.deriveExplainAttributes();
      HashMap<String, Operator<? extends Serializable>> opMap = work
          .getAliasToWork();
      if (!opMap.isEmpty()) {
        for (Operator<? extends Serializable> op : opMap.values()) {
          GenMapRedUtils.setKeyAndValueDesc(work, op);
        }
      }
    } else if (task instanceof ConditionalTask) {
      List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task)
          .getListTasks();
      for (Task<? extends Serializable> tsk : listTasks) {
        setKeyDescTaskTree(tsk);
      }
    }


    if (task.getChildTasks() == null) {
      return;
    }


    for (Task<? extends Serializable> childTask : task.getChildTasks()) {
      setKeyDescTaskTree(childTask);
    }
  }


  @SuppressWarnings("nls")
  public Phase1Ctx initPhase1Ctx() {


    Phase1Ctx ctx_1 = new Phase1Ctx();
    ctx_1.nextNum = 0;
    ctx_1.dest = "reduce";


    return ctx_1;
  }


  @Override
  @SuppressWarnings("nls")
  public void analyzeInternal(ASTNode ast) throws SemanticException {
    reset();


    QB qb = new QB(null, null, false);
    this.qb = qb;
    this.ast = ast;
    ASTNode child = ast;


    LOG.info("Starting Semantic Analysis");


    // analyze create table command
    if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) {
      // if it is not CTAS, we don't need to go further and just return
      if ((child = analyzeCreateTable(ast, qb)) == null) {
        return;
      }
    } else {
      SessionState.get().setCommandType(HiveOperation.QUERY);
    }


    // analyze create view command
    if (ast.getToken().getType() == HiveParser.TOK_CREATEVIEW) {
      child = analyzeCreateView(ast, qb);
      SessionState.get().setCommandType(HiveOperation.CREATEVIEW);
      if (child == null) {
        return;
      }
      viewSelect = child;
    }


    // continue analyzing from the child ASTNode.
    doPhase1(child, qb, initPhase1Ctx());
    LOG.info("Completed phase 1 of Semantic Analysis");


    getMetaData(qb);
    LOG.info("Completed getting MetaData in Semantic Analysis");


    // Save the result schema derived from the sink operator produced
    // by genPlan.  This has the correct column names, which clients
    // such as JDBC would prefer instead of the c0, c1 we'll end
    // up with later.
    Operator sinkOp = genPlan(qb);
    resultSchema =
        convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());


    if (createVwDesc != null) {
      saveViewDefinition();
      // Since we're only creating a view (not executing it), we
      // don't need to optimize or translate the plan (and in fact, those
      // procedures can interfere with the view creation). So
      // skip the rest of this method.
      ctx.setResDir(null);
      ctx.setResFile(null);
      return;
    }


    ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner,
        opToPartList, topOps, topSelOps, opParseCtx, joinContext, topToTable,
        loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
        opToSamplePruner);


    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    init(pCtx);
    qb = pCtx.getQB();


    // At this point we have the complete operator tree
    // from which we want to find the reduce operator
    genMapRedTasks(qb);


    LOG.info("Completed plan generation");


    return;
  }


  @Override
  public List<FieldSchema> getResultSchema() {
    return resultSchema;
  }


  private void saveViewDefinition() throws SemanticException {


    // Make a copy of the statement's result schema, since we may
    // modify it below as part of imposing view column names.
    List<FieldSchema> derivedSchema =
        new ArrayList<FieldSchema>(resultSchema);
    validateColumnNameUniqueness(derivedSchema);


    List<FieldSchema> imposedSchema = createVwDesc.getSchema();
    if (imposedSchema != null) {
      int explicitColCount = imposedSchema.size();
      int derivedColCount = derivedSchema.size();
      if (explicitColCount != derivedColCount) {
        throw new SemanticException(ErrorMsg.VIEW_COL_MISMATCH
            .getMsg(viewSelect));
      }
    }


    // Preserve the original view definition as specified by the user.
    String originalText = ctx.getTokenRewriteStream().toString(
        viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex());
    createVwDesc.setViewOriginalText(originalText);


    // Now expand the view definition with extras such as explicit column
    // references; this expanded form is what we'll re-parse when the view is
    // referenced later.
    unparseTranslator.applyTranslations(ctx.getTokenRewriteStream());
    String expandedText = ctx.getTokenRewriteStream().toString(
        viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex());


    if (imposedSchema != null) {
      // Merge the names from the imposed schema into the types
      // from the derived schema.
      StringBuilder sb = new StringBuilder();
      sb.append("SELECT ");
      int n = derivedSchema.size();
      for (int i = 0; i < n; ++i) {
        if (i > 0) {
          sb.append(", ");
        }
        FieldSchema fieldSchema = derivedSchema.get(i);
        // Modify a copy, not the original
        fieldSchema = new FieldSchema(fieldSchema);
        derivedSchema.set(i, fieldSchema);
        sb.append(HiveUtils.unparseIdentifier(fieldSchema.getName()));
        sb.append(" AS ");
        String imposedName = imposedSchema.get(i).getName();
        sb.append(HiveUtils.unparseIdentifier(imposedName));
        fieldSchema.setName(imposedName);
        // We don't currently allow imposition of a type
        fieldSchema.setComment(imposedSchema.get(i).getComment());
      }
      sb.append(" FROM (");
      sb.append(expandedText);
      sb.append(") ");
      sb.append(HiveUtils.unparseIdentifier(createVwDesc.getViewName()));
      expandedText = sb.toString();
    }


    createVwDesc.setSchema(derivedSchema);
    createVwDesc.setViewExpandedText(expandedText);
  }


  private List<FieldSchema> convertRowSchemaToViewSchema(RowResolver rr) {
    List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
    for (ColumnInfo colInfo : rr.getColumnInfos()) {
      if (colInfo.getIsVirtualCol()) {
        continue;
      }
      String colName = rr.reverseLookup(colInfo.getInternalName())[1];
      fieldSchemas.add(new FieldSchema(colName,
          colInfo.getType().getTypeName(), null));
    }
    return fieldSchemas;
  }


  /**
   * Generates an expression node descriptor for the expression passed in the
   * arguments. This function uses the row resolver and the metadata information
   * that are passed as arguments to resolve the column names to internal names.
   *
   * @param expr
   *          The expression
   * @param input
   *          The row resolver
   * @return exprNodeDesc
   * @throws SemanticException
   */
  @SuppressWarnings("nls")
  public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input)
      throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.


    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    if (colInfo != null) {
      ASTNode source = input.getExpressionSource(expr);
      if (source != null) {
        unparseTranslator.addCopyTranslation(expr, source);
      }
      return new ExprNodeColumnDesc(colInfo.getType(), colInfo
          .getInternalName(), colInfo.getTabAlias(), colInfo
          .getIsVirtualCol());
    }


    // Create the walker, the rules dispatcher and the context.
    TypeCheckCtx tcCtx = new TypeCheckCtx(input);
    tcCtx.setUnparseTranslator(unparseTranslator);


    HashMap<Node, Object> nodeOutputs =
      TypeCheckProcFactory.genExprNode(expr, tcCtx);
    ExprNodeDesc desc = (ExprNodeDesc) nodeOutputs.get(expr);
    if (desc == null) {
      throw new SemanticException(tcCtx.getError());
    }


    if (!unparseTranslator.isEnabled()) {
      // Not creating a view, so no need to track view expansions.
      return desc;
    }


    for (Map.Entry<Node, Object> entry : nodeOutputs.entrySet()) {
      if (!(entry.getKey() instanceof ASTNode)) {
        continue;
      }
      if (!(entry.getValue() instanceof ExprNodeColumnDesc)) {
        continue;
      }
      ASTNode node = (ASTNode) entry.getKey();
      ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) entry.getValue();
      if ((columnDesc.getTabAlias() == null)
          || (columnDesc.getTabAlias().length() == 0)) {
        // These aren't real column refs; instead, they are special
        // internal expressions used in the representation of aggregation.
        continue;
      }
      String[] tmp = input.reverseLookup(columnDesc.getColumn());
      StringBuilder replacementText = new StringBuilder();
      replacementText.append(HiveUtils.unparseIdentifier(tmp[0]));
      replacementText.append(".");
      replacementText.append(HiveUtils.unparseIdentifier(tmp[1]));
      unparseTranslator.addTranslation(node, replacementText.toString());
    }


    return desc;
  }


  /**
   * Gets the table Alias for the column from the column name. This function
   * throws and exception in case the same column name is present in multiple
   * table. The exception message indicates that the ambiguity could not be
   * resolved.
   *
   * @param qbm
   *          The metadata where the function looks for the table alias
   * @param colName
   *          The name of the non aliased column
   * @param pt
   *          The parse tree corresponding to the column(this is used for error
   *          reporting)
   * @return String
   * @throws SemanticException
   */
  static String getTabAliasForCol(QBMetaData qbm, String colName, ASTNode pt)
      throws SemanticException {
    String tabAlias = null;
    boolean found = false;


    for (Map.Entry<String, Table> ent : qbm.getAliasToTable().entrySet()) {
      for (FieldSchema field : ent.getValue().getAllCols()) {
        if (colName.equalsIgnoreCase(field.getName())) {
          if (found) {
            throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(pt));
          }


          found = true;
          tabAlias = ent.getKey();
        }
      }
    }
    return tabAlias;
  }


  @Override
  public void validate() throws SemanticException {
    // Validate inputs and outputs have right protectmode to execute the query
    for (ReadEntity readEntity: getInputs()) {
      ReadEntity.Type type = readEntity.getType();


      if (type != ReadEntity.Type.TABLE &&
          type != ReadEntity.Type.PARTITION) {
        // In current implementation it will never happen, but we leave it
        // here to make the logic complete.
        continue;
      }


      Table tbl = readEntity.getTable();
      Partition p = readEntity.getPartition();




      if (tbl.isOffline()) {
        throw new SemanticException(
            ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(
            "Table " + tbl.getTableName()));
      }


      if (type == ReadEntity.Type.PARTITION && p != null && p.isOffline()) {
        throw new SemanticException(
            ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(
            "Table " + tbl.getTableName() +
            " Partition " + p.getName()));
      }
    }


    for (WriteEntity writeEntity: getOutputs()) {
      WriteEntity.Type type = writeEntity.getType();


      if (type != WriteEntity.Type.TABLE &&
          type != WriteEntity.Type.PARTITION) {
        continue;
      }


      Table tbl;
      Partition p;




      if (type == WriteEntity.Type.PARTITION) {
        Partition inputPartition = writeEntity.getPartition();


        // If it is a partition, Partition's metastore is not fetched. We
        // need to fetch it.
        try {
          p = Hive.get().getPartition(
              inputPartition.getTable(), inputPartition.getSpec(), false);
          if (p != null) {
            tbl = p.getTable();
          } else {
            // if p is null, we assume that we insert to a new partition
            tbl = inputPartition.getTable();
          }
        } catch (HiveException e) {
          throw new SemanticException(e);
        }


        if (type == WriteEntity.Type.PARTITION && p!=null && p.isOffline()) {
          throw new SemanticException(
              ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(
              "Table " + tbl.getTableName() +
              " Partition " + p.getName()));
        }
      }
      else {
        tbl = writeEntity.getTable();
      }


      if (tbl.isOffline()) {
        throw new SemanticException(
            ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(
            "Table " + tbl.getTableName()));
      }
    }


    // validate all tasks
    for (Task<? extends Serializable> rootTask : rootTasks) {
      validate(rootTask);
    }
  }


  private void validate(Task<? extends Serializable> task)
      throws SemanticException {
    if (task.getChildTasks() == null) {
      return;
    }


    for (Task<? extends Serializable> childTask : task.getChildTasks()) {
      validate(childTask);
    }
  }


  /**
   * Get the row resolver given an operator.
   */
  public RowResolver getRowResolver(Operator opt) {
    return opParseCtx.get(opt).getRowResolver();
  }


  /**
   * Add default properties for table property. If a default parameter exists
   * in the tblProp, the value in tblProp will be kept.
   * @param table property map
   * @return Modified table property map
   */
  private Map<String, String> addDefaultProperties(Map<String, String> tblProp) {
    Map<String, String> retValue;
    if (tblProp == null) {
      retValue = new HashMap<String, String>();
    } else {
      retValue = tblProp;
    }
    String paraString = HiveConf.getVar(conf, ConfVars.NEWTABLEDEFAULTPARA);
    if (paraString != null && !paraString.isEmpty()) {
      for (String keyValuePair: paraString.split(",")) {
        String[] keyValue = keyValuePair.split("=", 2);
        if (keyValue.length != 2) {
          continue;
        }
        if (!retValue.containsKey(keyValue[0])) {
          retValue.put(keyValue[0], keyValue[1]);
        }
      }
    }
    return retValue;
  }


  /**
   * Analyze the create table command. If it is a regular create-table or
   * create-table-like statements, we create a DDLWork and return true. If it is
   * a create-table-as-select, we get the necessary info such as the SerDe and
   * Storage Format and put it in QB, and return false, indicating the rest of
   * the semantic analyzer need to deal with the select statement with respect
   * to the SerDe and Storage Format.
   */
  private ASTNode analyzeCreateTable(ASTNode ast, QB qb)
      throws SemanticException {
    String tableName = unescapeIdentifier(ast.getChild(0).getText());
    String likeTableName = null;
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    List<String> bucketCols = new ArrayList<String>();
    List<Order> sortCols = new ArrayList<Order>();
    int numBuckets = -1;
    String comment = null;
    String location = null;
    Map<String, String> tblProps = null;
    boolean ifNotExists = false;
    boolean isExt = false;
    ASTNode selectStmt = null;
    final int CREATE_TABLE = 0; // regular CREATE TABLE
    final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT)
    final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS)
    int command_type = CREATE_TABLE;


    RowFormatParams rowFormatParams = new RowFormatParams();
    StorageFormat storageFormat = new StorageFormat();
    AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars();


    LOG.info("Creating table " + tableName + " position="
        + ast.getCharPositionInLine());
    int numCh = ast.getChildCount();


    /*
     * Check the 1st-level children and do simple semantic checks: 1) CTLT and
     * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column
     * list (target table schema). 3) CTAS does not support partitioning (for
     * now).
     */
    for (int num = 1; num < numCh; num++) {
      ASTNode child = (ASTNode) ast.getChild(num);
      if (storageFormat.fillStorageFormat(child, shared)) {
        continue;
      }
      switch (child.getToken().getType()) {
      case HiveParser.TOK_IFNOTEXISTS:
        ifNotExists = true;
        break;
      case HiveParser.KW_EXTERNAL:
        isExt = true;
        break;
      case HiveParser.TOK_LIKETABLE:
        if (child.getChildCount() > 0) {
          likeTableName = unescapeIdentifier(child.getChild(0).getText());
          if (likeTableName != null) {
            if (command_type == CTAS) {
              throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE
                  .getMsg());
            }
            if (cols.size() != 0) {
              throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE
                  .getMsg());
            }
          }
          command_type = CTLT;
        }
        break;
      case HiveParser.TOK_QUERY: // CTAS
        if (command_type == CTLT) {
          throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg());
        }
        if (cols.size() != 0) {
          throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg());
        }
        if (partCols.size() != 0 || bucketCols.size() != 0) {
          boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
          if (dynPart == false) {
            throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
          } else {
            // TODO: support dynamic partition for CTAS
            throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
          }
        }
        if (isExt) {
          throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg());
        }
        command_type = CTAS;
        selectStmt = child;
        break;
      case HiveParser.TOK_TABCOLLIST:
        cols = getColumns(child);
        break;
      case HiveParser.TOK_TABLECOMMENT:
        comment = unescapeSQLString(child.getChild(0).getText());
        break;
      case HiveParser.TOK_TABLEPARTCOLS:
        partCols = getColumns((ASTNode) child.getChild(0), false);
        break;
      case HiveParser.TOK_TABLEBUCKETS:
        bucketCols = getColumnNames((ASTNode) child.getChild(0));
        if (child.getChildCount() == 2) {
          numBuckets = (Integer.valueOf(child.getChild(1).getText()))
              .intValue();
        } else {
          sortCols = getColumnNamesOrder((ASTNode) child.getChild(1));
          numBuckets = (Integer.valueOf(child.getChild(2).getText()))
              .intValue();
        }
        break;
      case HiveParser.TOK_TABLEROWFORMAT:
        rowFormatParams.analyzeRowFormat(shared, child);
        break;
      case HiveParser.TOK_TABLELOCATION:
        location = unescapeSQLString(child.getChild(0).getText());
        break;
      case HiveParser.TOK_TABLEPROPERTIES:
        tblProps = DDLSemanticAnalyzer.getProps((ASTNode) child.getChild(0));
        break;
      case HiveParser.TOK_TABLESERIALIZER:
        child = (ASTNode) child.getChild(0);
        shared.serde = unescapeSQLString(child.getChild(0).getText());
        if (child.getChildCount() == 2) {
          readProps((ASTNode) (child.getChild(1).getChild(0)),
              shared.serdeProps);
        }
        break;


      case HiveParser.TOK_FILEFORMAT_GENERIC:
        handleGenericFileFormat(child);
        break;


      default:
        assert false;
      }
    }


    storageFormat.fillDefaultStorageFormat(shared);


    if ((command_type == CTAS) && (storageFormat.storageHandler != null)) {
      throw new SemanticException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg());
    }


    // check for existence of table
    if (ifNotExists) {
      try {
        List<String> tables = db.getTablesByPattern(tableName);
        if (tables != null && tables.size() > 0) { // table exists
          return null;
        }
      } catch (HiveException e) {
        e.printStackTrace();
      }
    }


    // Handle different types of CREATE TABLE command
    CreateTableDesc crtTblDesc = null;
    switch (command_type) {


    case CREATE_TABLE: // REGULAR CREATE TABLE DDL
      tblProps = addDefaultProperties(tblProps);
      crtTblDesc = new CreateTableDesc(tableName, isExt, cols, partCols,
          bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape,
          rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment,
          storageFormat.inputFormat, storageFormat.outputFormat, location, shared.serde,
          storageFormat.storageHandler, shared.serdeProps, tblProps, ifNotExists);


      validateCreateTable(crtTblDesc);
      // outputs is empty, which means this create table happens in the current
      // database.
      SessionState.get().setCommandType(HiveOperation.CREATETABLE);
      rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
          crtTblDesc), conf));
      break;


    case CTLT: // create table like <tbl_name>
      CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(tableName,
          isExt, location, ifNotExists, likeTableName);
      SessionState.get().setCommandType(HiveOperation.CREATETABLE);
      rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
          crtTblLikeDesc), conf));
      break;


    case CTAS: // create table as select


      // Verify that the table does not already exist
      try {
        if (null != db.getTable(db.getCurrentDatabase(), tableName, false)) {
          throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(tableName));
        }
      } catch (HiveException e) {
        throw new SemanticException(e);
      }


      tblProps = addDefaultProperties(tblProps);


      crtTblDesc = new CreateTableDesc(tableName, isExt, cols, partCols,
          bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape,
          rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.inputFormat,
          storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, shared.serdeProps,
          tblProps, ifNotExists);
      qb.setTableDesc(crtTblDesc);


      SessionState.get().setCommandType(HiveOperation.CREATETABLE_AS_SELECT);


      return selectStmt;
    default:
      throw new SemanticException("Unrecognized command.");
    }
    return null;
  }


  private ASTNode analyzeCreateView(ASTNode ast, QB qb)
      throws SemanticException {
    String tableName = unescapeIdentifier(ast.getChild(0).getText());
    List<FieldSchema> cols = null;
    boolean ifNotExists = false;
    String comment = null;
    ASTNode selectStmt = null;
    Map<String, String> tblProps = null;


    LOG.info("Creating view " + tableName + " position="
        + ast.getCharPositionInLine());
    int numCh = ast.getChildCount();
    for (int num = 1; num < numCh; num++) {
      ASTNode child = (ASTNode) ast.getChild(num);
      switch (child.getToken().getType()) {
      case HiveParser.TOK_IFNOTEXISTS:
        ifNotExists = true;
        break;
      case HiveParser.TOK_QUERY:
        selectStmt = child;
        break;
      case HiveParser.TOK_TABCOLNAME:
        cols = getColumns(child);
        break;
      case HiveParser.TOK_TABLECOMMENT:
        comment = unescapeSQLString(child.getChild(0).getText());
        break;
      case HiveParser.TOK_TABLEPROPERTIES:
        tblProps = DDLSemanticAnalyzer.getProps((ASTNode) child.getChild(0));
        break;
      default:
        assert false;
      }
    }


    createVwDesc = new CreateViewDesc(
      tableName, cols, comment, tblProps, ifNotExists);
    unparseTranslator.enable();
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
        createVwDesc), conf));
    return selectStmt;
  }


  private List<String> validateColumnNameUniqueness(
      List<FieldSchema> fieldSchemas) throws SemanticException {


    // no duplicate column names
    // currently, it is a simple n*n algorithm - this can be optimized later if
    // need be
    // but it should not be a major bottleneck as the number of columns are
    // anyway not so big
    Iterator<FieldSchema> iterCols = fieldSchemas.iterator();
    List<String> colNames = new ArrayList<String>();
    while (iterCols.hasNext()) {
      String colName = iterCols.next().getName();
      Iterator<String> iter = colNames.iterator();
      while (iter.hasNext()) {
        String oldColName = iter.next();
        if (colName.equalsIgnoreCase(oldColName)) {
          throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES
              .getMsg(oldColName));
        }
      }
      colNames.add(colName);
    }
    return colNames;
  }


  private void validateCreateTable(CreateTableDesc crtTblDesc)
      throws SemanticException {


    if ((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) {
      // for now make sure that serde exists
      if (StringUtils.isEmpty(crtTblDesc.getSerName())
          || !SerDeUtils.shouldGetColsFromSerDe(crtTblDesc.getSerName())) {
        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
      }
      return;
    }


    if (crtTblDesc.getStorageHandler() == null) {
      try {
        Class<?> origin = Class.forName(crtTblDesc.getOutputFormat(), true,
          JavaUtils.getClassLoader());
        Class<? extends HiveOutputFormat> replaced = HiveFileFormatUtils
          .getOutputFormatSubstitute(origin);
        if (replaced == null) {
          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
            .getMsg());
        }
      } catch (ClassNotFoundException e) {
        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
      }
    }


    List<String> colNames = validateColumnNameUniqueness(crtTblDesc.getCols());


    if (crtTblDesc.getBucketCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<String> bucketCols = crtTblDesc.getBucketCols().iterator();
      while (bucketCols.hasNext()) {
        String bucketCol = bucketCols.next();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (bucketCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }


    if (crtTblDesc.getSortCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<Order> sortCols = crtTblDesc.getSortCols().iterator();
      while (sortCols.hasNext()) {
        String sortCol = sortCols.next().getCol();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (sortCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }


    if (crtTblDesc.getPartCols() != null) {
      // there is no overlap between columns and partitioning columns
      Iterator<FieldSchema> partColsIter = crtTblDesc.getPartCols().iterator();
      while (partColsIter.hasNext()) {
        String partCol = partColsIter.next().getName();
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = unescapeIdentifier(colNamesIter.next());
          if (partCol.equalsIgnoreCase(colName)) {
            throw new SemanticException(
                ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
          }
        }
      }
    }
  }


  private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx)
    throws SemanticException {


    // bypass for explain queries for now
    if (ctx.getExplain()) {
      return;
    }


    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() ||
        !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
      return;
    }


    final Context lCtx = ctx;
    PathFilter p = new PathFilter () {
        public boolean accept(Path file) {
          return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
      };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);


    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask: mrtasks) {
      try {
        ContentSummary inputSummary = Utilities.getInputSummary
          (ctx, (MapredWork)mrtask.getWork(), p);
        int numReducers = getNumberOfReducers(mrtask.getWork(), conf);


        if (LOG.isDebugEnabled()) {
          LOG.debug("Task: " + mrtask.getId() + ", Summary: " +
                   inputSummary.getLength() + "," + inputSummary.getFileCount() + ","
                   + numReducers);
        }


        if(MapRedTask.isEligibleForLocalMode(conf, inputSummary, numReducers) != null) {
          hasNonLocalJob = true;
          break;
        }else{
          mrtask.setLocalMode(true);
  }
      } catch (IOException e) {
        throw new SemanticException (e);
      }
    }


    if(!hasNonLocalJob) {
      // none of the mapred tasks needs to be run locally. That means that the
      // query can be executed entirely in local mode. Save the current tracker
      // value and restore it when done
      ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT));
      conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
      console.printInfo("Automatically selecting local only mode for query");


      // If all the tasks can be run locally, we can use local disk for
      // storing intermediate data.


      /**
       * This code is commented out pending further testing/development
       * for (Task<? extends Serializable> t: rootTasks)
       * t.localizeMRTmpFiles(ctx);
       */
    }
  }


  /**
   * Make a best guess at trying to find the number of reducers
   */
  private static int getNumberOfReducers(MapredWork mrwork, HiveConf conf) {
    if (mrwork.getReducer() == null) {
      return 0;
    }


    if (mrwork.getNumReduceTasks() >= 0) {
      return mrwork.getNumReduceTasks();
    }


    return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
  }


}
Source Code of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$SortBucketRSCtx

Related Classes of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$SortBucketRSCtx