Package org.apache.hadoop.hive.ql.exec.vector

Source Code of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext$OutputColumnManager

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.vector;

import java.lang.reflect.Constructor;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import com.facebook.presto.hive.shaded.org.apache.commons.logging.Log;
import com.facebook.presto.hive.shaded.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
import org.apache.hadoop.hive.ql.udf.UDFConv;
import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFSign;
import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
import org.apache.hadoop.hive.ql.udf.UDFToByte;
import org.apache.hadoop.hive.ql.udf.UDFToDouble;
import org.apache.hadoop.hive.ql.udf.UDFToFloat;
import org.apache.hadoop.hive.ql.udf.UDFToInteger;
import org.apache.hadoop.hive.ql.udf.UDFToLong;
import org.apache.hadoop.hive.ql.udf.UDFToShort;
import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

/**
* Context class for vectorization execution.
* Main role is to map column names to column indices and serves as a
* factory class for building vectorized expressions out of descriptors.
*
*/
public class VectorizationContext {

  private static final Log LOG = LogFactory.getLog(
      VectorizationContext.class.getName());

  VectorExpressionDescriptor vMap;

  //columnName to column position map
  private final Map<String, Integer> columnMap;
  private final int firstOutputColumnIndex;

  public static final Pattern decimalTypePattern = Pattern.compile("decimal.*",
      Pattern.CASE_INSENSITIVE);

  //Map column number to type
  private final OutputColumnManager ocm;

  // File key is used by operators to retrieve the scratch vectors
  // from mapWork at runtime. The operators that modify the structure of
  // a vector row batch, need to allocate scratch vectors as well. Every
  // operator that creates a new Vectorization context should set a unique
  // fileKey.
  private String fileKey = null;

  // Set of UDF classes for type casting data types in row-mode.
  private static Set<Class<?>> castExpressionUdfs = new HashSet<Class<?>>();
  static {
    castExpressionUdfs.add(GenericUDFToDecimal.class);
    castExpressionUdfs.add(GenericUDFToBinary.class);
    castExpressionUdfs.add(GenericUDFToDate.class);
    castExpressionUdfs.add(GenericUDFToUnixTimeStamp.class);
    castExpressionUdfs.add(GenericUDFToUtcTimestamp.class);
    castExpressionUdfs.add(GenericUDFToChar.class);
    castExpressionUdfs.add(GenericUDFToVarchar.class);
    castExpressionUdfs.add(GenericUDFTimestamp.class);
    castExpressionUdfs.add(UDFToByte.class);
    castExpressionUdfs.add(UDFToBoolean.class);
    castExpressionUdfs.add(UDFToDouble.class);
    castExpressionUdfs.add(UDFToFloat.class);
    castExpressionUdfs.add(UDFToString.class);
    castExpressionUdfs.add(UDFToInteger.class);
    castExpressionUdfs.add(UDFToLong.class);
    castExpressionUdfs.add(UDFToShort.class);
  }

  public VectorizationContext(Map<String, Integer> columnMap,
      int initialOutputCol) {
    this.columnMap = columnMap;
    this.ocm = new OutputColumnManager(initialOutputCol);
    this.firstOutputColumnIndex = initialOutputCol;
    vMap = new VectorExpressionDescriptor();
  }

  /**
   * This constructor inherits the OutputColumnManger and from
   * the 'parent' constructor, therefore this should be used only by operators
   * that don't create a new vectorized row batch. This should be used only by
   * operators that want to modify the columnName map without changing the row batch.
   */
  public VectorizationContext(VectorizationContext parent) {
    this.columnMap = new HashMap<String, Integer>(parent.columnMap);
    this.ocm = parent.ocm;
    this.firstOutputColumnIndex = parent.firstOutputColumnIndex;
    vMap = new VectorExpressionDescriptor();
  }

  public String getFileKey() {
    return fileKey;
  }

  public void setFileKey(String fileKey) {
    this.fileKey = fileKey;
  }

  protected int getInputColumnIndex(String name) {
    if (!columnMap.containsKey(name)) {
      LOG.error(String.format("The column %s is not in the vectorization context column map.", name));
    }
    return columnMap.get(name);
  }

  protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) {
    return columnMap.get(colExpr.getColumn());
  }

  private static class OutputColumnManager {
    private final int initialOutputCol;
    private int outputColCount = 0;

    protected OutputColumnManager(int initialOutputCol) {
      this.initialOutputCol = initialOutputCol;
    }

    //The complete list of output columns. These should be added to the
    //Vectorized row batch for processing. The index in the row batch is
    //equal to the index in this array plus initialOutputCol.
    //Start with size 100 and double when needed.
    private String [] outputColumnsTypes = new String[100];

    private final Set<Integer> usedOutputColumns = new HashSet<Integer>();

    int allocateOutputColumn(String columnType) {
      if (initialOutputCol < 0) {
        // This is a test
        return 0;
      }
      int relativeCol = allocateOutputColumnInternal(columnType);
      return initialOutputCol + relativeCol;
    }

    private int allocateOutputColumnInternal(String columnType) {
      for (int i = 0; i < outputColCount; i++) {

        // Re-use an existing, available column of the same required type.
        if (usedOutputColumns.contains(i) ||
            !(outputColumnsTypes)[i].equalsIgnoreCase(columnType)) {
          continue;
        }
        //Use i
        usedOutputColumns.add(i);
        return i;
      }
      //Out of allocated columns
      if (outputColCount < outputColumnsTypes.length) {
        int newIndex = outputColCount;
        outputColumnsTypes[outputColCount++] = columnType;
        usedOutputColumns.add(newIndex);
        return newIndex;
      } else {
        //Expand the array
        outputColumnsTypes = Arrays.copyOf(outputColumnsTypes, 2*outputColCount);
        int newIndex = outputColCount;
        outputColumnsTypes[outputColCount++] = columnType;
        usedOutputColumns.add(newIndex);
        return newIndex;
      }
    }

    void freeOutputColumn(int index) {
      if (initialOutputCol < 0) {
        // This is a test
        return;
      }
      int colIndex = index-initialOutputCol;
      if (colIndex >= 0) {
        usedOutputColumns.remove(index-initialOutputCol);
      }
    }
  }

  private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc
      exprDesc, Mode mode) {
    int columnNum = getInputColumnIndex(exprDesc.getColumn());
    VectorExpression expr = null;
    switch (mode) {
      case FILTER:
        //Important: It will come here only if the column is being used as a boolean
        expr = new SelectColumnIsTrue(columnNum);
        break;
      case PROJECTION:
        expr = new IdentityExpression(columnNum, exprDesc.getTypeString());
        break;
    }
    return expr;
  }

  public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) throws HiveException {
    return getVectorExpressions(exprNodes, Mode.PROJECTION);
  }

  public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes, Mode mode)
    throws HiveException {

    int i = 0;
    if (null == exprNodes) {
      return new VectorExpression[0];
    }
    VectorExpression[] ret = new VectorExpression[exprNodes.size()];
    for (ExprNodeDesc e : exprNodes) {
      ret[i++] = getVectorExpression(e, mode);
    }
    return ret;
  }

  public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException {
    return getVectorExpression(exprDesc, Mode.PROJECTION);
  }

  /**
   * Returns a vector expression for a given expression
   * description.
   * @param exprDesc, Expression description
   * @param mode
   * @return {@link VectorExpression}
   * @throws HiveException
   */
  public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) throws HiveException {
    VectorExpression ve = null;
    if (exprDesc instanceof ExprNodeColumnDesc) {
      ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
    } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
      ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
      if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr)) {
        ve = getCustomUDFExpression(expr);
      } else {

        // Add cast expression if needed. Child expressions of a udf may return different data types
        // and that would require converting their data types to evaluate the udf.
        // For example decimal column added to an integer column would require integer column to be
        // cast to decimal.
        List<ExprNodeDesc> childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(),
            exprDesc.getChildren(), exprDesc.getTypeInfo());
        ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
            childExpressions, mode, exprDesc.getTypeInfo());
      }
    } else if (exprDesc instanceof ExprNodeConstantDesc) {
      ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(),
          mode);
    }
    if (ve == null) {
      throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Input Expression = " + exprDesc.getTypeInfo()
          + ", Vectorized Expression = " + ve.toString());
    }
    return ve;
  }

  /**
   * Given a udf and its children, return the common type to which the children's type should be
   * cast.
   */
  private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, List<ExprNodeDesc> children,
      TypeInfo returnType) {
    TypeInfo commonType;
    if (genericUdf instanceof GenericUDFBaseCompare) {

      // Apply comparison rules
      TypeInfo tLeft = children.get(0).getTypeInfo();
      TypeInfo tRight = children.get(1).getTypeInfo();
      commonType = FunctionRegistry.getCommonClassForComparison(tLeft, tRight);
      if (commonType == null) {
        commonType = returnType;
      }
    } else if (genericUdf instanceof GenericUDFIn) {

      // Cast to the type of the first child
      return children.get(0).getTypeInfo();
    } else {
      // The children type should be converted to return type
      commonType = returnType;
    }
    return commonType;
  }

  /**
   * Add a cast expression to the expression tree if needed. The output of child expressions of a given UDF might
   * need a cast if their return type is different from the return type of the UDF.
   *
   * @param genericUDF The given UDF
   * @param children Child expressions of the UDF that might require a cast.
   * @param returnType The return type of the UDF.
   * @return List of child expressions added with cast.
   */
  private List<ExprNodeDesc> getChildExpressionsWithImplicitCast(GenericUDF genericUDF,
      List<ExprNodeDesc> children, TypeInfo returnType) throws HiveException {
    if (isExcludedFromCast(genericUDF)) {

      // No implicit cast needed
      return children;
    }
    if (children == null) {
      return null;
    }

    TypeInfo commonType = getCommonTypeForChildExpressions(genericUDF, children, returnType);

    if (commonType == null) {

      // Couldn't determine common type, don't cast
      return children;
    }

    List<ExprNodeDesc> childrenWithCasts = new ArrayList<ExprNodeDesc>();
    boolean atleastOneCastNeeded = false;
    for (ExprNodeDesc child : children) {
      ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
      if (castExpression != null) {
        atleastOneCastNeeded = true;
        childrenWithCasts.add(castExpression);
      } else {
        childrenWithCasts.add(child);
      }
    }
    if (atleastOneCastNeeded) {
      return childrenWithCasts;
    } else {
      return children;
    }
  }

  private boolean isExcludedFromCast(GenericUDF genericUDF) {
    boolean ret = castExpressionUdfs.contains(genericUDF.getClass())
        || (genericUDF instanceof GenericUDFRound) || (genericUDF instanceof GenericUDFBetween);

    if (ret) {
      return ret;
    }

    if (genericUDF instanceof GenericUDFBridge) {
      Class<?> udfClass = ((GenericUDFBridge) genericUDF).getUdfClass();
      return castExpressionUdfs.contains(udfClass)
          || UDFSign.class.isAssignableFrom(udfClass);
    }
    return false;
  }

  /**
   * Creates a DecimalTypeInfo object with appropriate precision and scale for the given
   * inputTypeInfo.
   */
  private TypeInfo updatePrecision(TypeInfo inputTypeInfo, DecimalTypeInfo returnType) {
    if (!(inputTypeInfo instanceof PrimitiveTypeInfo)) {
      return returnType;
    }
    PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) inputTypeInfo;
    int precision = getPrecisionForType(ptinfo);
    int scale = HiveDecimalUtils.getScaleForType(ptinfo);
    return new DecimalTypeInfo(precision, scale);
  }

  /**
   * The GenericUDFs might need their children output to be cast to the given castType.
   * This method returns a cast expression that would achieve the required casting.
   */
  private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType)
      throws HiveException {
    TypeInfo inputTypeInfo = child.getTypeInfo();
    String inputTypeString = inputTypeInfo.getTypeName();
    String castTypeString = castType.getTypeName();

    if (inputTypeString.equals(castTypeString)) {
      // Nothing to be done
      return null;
    }
    boolean inputTypeDecimal = false;
    boolean castTypeDecimal = false;
    if (decimalTypePattern.matcher(inputTypeString).matches()) {
      inputTypeDecimal = true;
    }
    if (decimalTypePattern.matcher(castTypeString).matches()) {
      castTypeDecimal = true;
    }

    if (castTypeDecimal && !inputTypeDecimal) {

      // Cast the input to decimal
      // If castType is decimal, try not to lose precision for numeric types.
      castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
      GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
      castToDecimalUDF.setTypeInfo(castType);
      List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
      children.add(child);
      ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
      return desc;
    } else if (!castTypeDecimal && inputTypeDecimal) {

      // Cast decimal input to returnType
      GenericUDF genericUdf = getGenericUDFForCast(castType);
      List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
      children.add(child);
      ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
      return desc;
    } else {

      // Casts to exact types including long to double etc. are needed in some special cases.
      if (udf instanceof GenericUDFCoalesce) {
        GenericUDF genericUdf = getGenericUDFForCast(castType);
        List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
        children.add(child);
        ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
        return desc;
      }
    }
    return null;
  }
 
  private int getPrecisionForType(PrimitiveTypeInfo typeInfo) {
    if (isFloatFamily(typeInfo.getTypeName())) {
      return HiveDecimal.MAX_PRECISION;
    }
    return HiveDecimalUtils.getPrecisionForType(typeInfo);
  }

  private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException {
    UDF udfClass = null;
    GenericUDF genericUdf = null;
    switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
      case BYTE:
        udfClass = new UDFToByte();
        break;
      case SHORT:
        udfClass = new UDFToShort();
        break;
      case INT:
        udfClass = new UDFToInteger();
        break;
      case LONG:
        udfClass = new UDFToLong();
        break;
      case FLOAT:
        udfClass = new UDFToFloat();
        break;
      case DOUBLE:
        udfClass = new UDFToDouble();
        break;
      case STRING:
        udfClass = new UDFToString();
        break;
      case BOOLEAN:
        udfClass = new UDFToBoolean();
        break;
      case DATE:
        genericUdf = new GenericUDFToDate();
        break;
      case TIMESTAMP:
        genericUdf = new GenericUDFToUnixTimeStamp();
        break;
      case BINARY:
        genericUdf = new GenericUDFToBinary();
        break;
      case DECIMAL:
        genericUdf = new GenericUDFToDecimal();
        break;
    }
    if (genericUdf == null) {
      if (udfClass == null) {
        throw new HiveException("Could not add implicit cast for type "+castType.getTypeName());
      }
      genericUdf = new GenericUDFBridge();
      ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName());
    }
    if (genericUdf instanceof SettableUDF) {
  ((SettableUDF)genericUdf).setTypeInfo(castType);
    }   
    return genericUdf;
  }


  /* Return true if this is one of a small set of functions for which
   * it is significantly easier to use the old code path in vectorized
   * mode instead of implementing a new, optimized VectorExpression.
   *
   * Depending on performance requirements and frequency of use, these
   * may be implemented in the future with an optimized VectorExpression.
   */
  public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) {
    GenericUDF gudf = expr.getGenericUDF();
    if (gudf instanceof GenericUDFBridge) {
      GenericUDFBridge bridge = (GenericUDFBridge) gudf;
      Class<? extends UDF> udfClass = bridge.getUdfClass();
      if (udfClass.equals(UDFHex.class)
          || udfClass.equals(UDFConv.class)
          || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string")
          || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string")
          || udfClass.equals(UDFToString.class) &&
               (arg0Type(expr).equals("timestamp")
                   || arg0Type(expr).equals("double")
                   || arg0Type(expr).equals("float"))) {
        return true;
      }
    } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string"))

            /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because
             * of their complexity and generality. In the future, variations of these
             * can be optimized to run faster for the vectorized code path. For example,
             * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
             * is an example of a GenericUDFCase that has all constant arguments
             * except for the first argument. This is probably a common case and a
             * good candidate for a fast, special-purpose VectorExpression. Then
             * the UDF Adaptor code path could be used as a catch-all for
             * non-optimized general cases.
             */
            || gudf instanceof GenericUDFCase
            || gudf instanceof GenericUDFWhen) {
      return true;
    }
    return false;
  }

  public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
    return udfClass.equals(UDFToByte.class)
        || udfClass.equals(UDFToShort.class)
        || udfClass.equals(UDFToInteger.class)
        || udfClass.equals(UDFToLong.class);

    // Boolean is purposely excluded.
  }

  public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) {
    return udfClass.equals(UDFToDouble.class)
        || udfClass.equals(UDFToFloat.class);
  }

  // Return the type string of the first argument (argument 0).
  public static String arg0Type(ExprNodeGenericFuncDesc expr) {
    String type = expr.getChildren().get(0).getTypeString();
    return type;
  }

  // Return true if this is a custom UDF or custom GenericUDF.
  // This is for use only in the planner. It will fail in a task.
  public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
    String udfName = expr.getFuncText();
    if (udfName == null) {
      return false;
    }
    FunctionInfo funcInfo = FunctionRegistry.getFunctionInfo(udfName);
    if (funcInfo == null) {
      return false;
    }
    boolean isNativeFunc = funcInfo.isNative();
    return !isNativeFunc;
  }

  /**
   * Handles only the special case of unary operators on a constant.
   * @param exprDesc
   * @return The same expression if no folding done, else return the constant
   *         expression.
   * @throws HiveException
   */
  ExprNodeDesc foldConstantsForUnaryExpression(ExprNodeDesc exprDesc) throws HiveException {
    if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) {
      return exprDesc;
    }
   
    if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) {
      return exprDesc;
    }

    ExprNodeConstantDesc foldedChild = null;
    if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) {

      // try recursive folding
      ExprNodeDesc expr = foldConstantsForUnaryExpression(exprDesc.getChildren().get(0));
      if (expr instanceof ExprNodeConstantDesc) {
        foldedChild = (ExprNodeConstantDesc) expr;
      }
    } else {
      foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
    }

    if (foldedChild == null) {
      return exprDesc;
    }

    ObjectInspector childoi = foldedChild.getWritableObjectInspector();
    GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();

    if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive
        || castExpressionUdfs.contains(gudf.getClass())
        || ((gudf instanceof GenericUDFBridge)
            && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
      ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
      ObjectInspector output = evaluator.initialize(childoi);
      Object constant = evaluator.evaluate(null);
      Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output)
      return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java);
     }

    return exprDesc;
  }

  /* Fold simple unary expressions in all members of the input list and return new list
   * containing results.
   */
  private List<ExprNodeDesc> foldConstantsForUnaryExprs(List<ExprNodeDesc> childExpr)
      throws HiveException {
    List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
    if (childExpr != null) {
      for (ExprNodeDesc expr : childExpr) {
        expr = this.foldConstantsForUnaryExpression(expr);
        constantFoldedChildren.add(expr);
      }
    }
    return constantFoldedChildren;
  }

  private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo,
      Mode mode) throws HiveException {
    String type = typeInfo.getTypeName();
    String colVectorType = getNormalizedTypeName(type);
    int outCol = -1;
    if (mode == Mode.PROJECTION) {
      outCol = ocm.allocateOutputColumn(colVectorType);
    }
    if (decimalTypePattern.matcher(type).matches()) {
      VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue);
      ve.setOutputType(typeInfo.getTypeName());
      return ve;
    } else if (type.equalsIgnoreCase("long") || type.equalsIgnoreCase("int") ||
        type.equalsIgnoreCase("short") || type.equalsIgnoreCase("byte")) {
      return new ConstantVectorExpression(outCol,
          ((Number) constantValue).longValue());
    } else if (type.equalsIgnoreCase("double") || type.equalsIgnoreCase("float")) {
      return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue());
    } else if (type.equalsIgnoreCase("string")) {
      return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes());
    } else if (type.equalsIgnoreCase("boolean")) {
      if (mode == Mode.FILTER) {
        if (((Boolean) constantValue).booleanValue()) {
          return new FilterConstantBooleanVectorExpression(1);
        } else {
          return new FilterConstantBooleanVectorExpression(0);
        }
      } else {
        if (((Boolean) constantValue).booleanValue()) {
          return new ConstantVectorExpression(outCol, 1);
        } else {
          return new ConstantVectorExpression(outCol, 0);
        }
      }
    }
    throw new HiveException("Unsupported constant type: "+type.toString());
  }

  /**
   * Used as a fast path for operations that don't modify their input, like unary +
   * and casting boolean to long. IdentityExpression and its children are always
   * projections.
   */
  private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
      throws HiveException {
    ExprNodeDesc childExpr = childExprList.get(0);
    int inputCol;
    String colType;
    VectorExpression v1 = null;
    if (childExpr instanceof ExprNodeGenericFuncDesc) {
      v1 = getVectorExpression(childExpr);
      inputCol = v1.getOutputColumn();
      colType = v1.getOutputType();
    } else if (childExpr instanceof ExprNodeColumnDesc) {
      ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
      inputCol = getInputColumnIndex(colDesc.getColumn());
      colType = colDesc.getTypeString();
    } else {
      throw new HiveException("Expression not supported: "+childExpr);
    }
    VectorExpression expr = new IdentityExpression(inputCol, colType);
    if (v1 != null) {
      expr.setChildExpressions(new VectorExpression [] {v1});
    }
    return expr;
  }

  private VectorExpression getVectorExpressionForUdf(Class<?> udf, List<ExprNodeDesc> childExpr, Mode mode,
      TypeInfo returnType) throws HiveException {
    int numChildren = (childExpr == null) ? 0 : childExpr.size();
    if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
      return null;
    }
    VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
    builder.setNumArguments(numChildren);
    builder.setMode(mode);
    for (int i = 0; i < numChildren; i++) {
      ExprNodeDesc child = childExpr.get(i);
      builder.setArgumentType(i, child.getTypeString());
      if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) {
        builder.setInputExpressionType(i, InputExpressionType.COLUMN);
      } else if (child instanceof ExprNodeConstantDesc) {
        builder.setInputExpressionType(i, InputExpressionType.SCALAR);
      } else {
        throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
      }
    }
    VectorExpressionDescriptor.Descriptor descriptor = builder.build();
    Class<?> vclass = this.vMap.getVectorExpressionClass(udf, descriptor);
    if (vclass == null) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("No vector udf found for "+udf.getSimpleName() + ", descriptor: "+descriptor);
      }
      return null;
    }
    Mode childrenMode = getChildrenMode(mode, udf);
    return createVectorExpression(vclass, childExpr, childrenMode, returnType);
  }

  private VectorExpression createVectorExpression(Class<?> vectorClass,
      List<ExprNodeDesc> childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException {
    int numChildren = childExpr == null ? 0: childExpr.size();
    VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren];
    List<VectorExpression> children = new ArrayList<VectorExpression>();
    Object[] arguments = new Object[numChildren];
    try {
      for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName());
        if (child instanceof ExprNodeGenericFuncDesc) {
          VectorExpression vChild = getVectorExpression(child, childrenMode);
            children.add(vChild);
            arguments[i] = vChild.getOutputColumn();
        } else if (child instanceof ExprNodeColumnDesc) {
          int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            if (childrenMode == Mode.FILTER) {
              // In filter mode, the column must be a boolean
              children.add(new SelectColumnIsTrue(colIndex));
            }
            arguments[i] = colIndex;
        } else if (child instanceof ExprNodeConstantDesc) {
          Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
          arguments[i] = scalarValue;
        } else {
          throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
      }
      VectorExpression  vectorExpression = instantiateExpression(vectorClass, returnType, arguments);
      vectorExpression.setInputTypes(inputTypes);
      if ((vectorExpression != null) && !children.isEmpty()) {
        vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
      }
      return vectorExpression;
    } catch (Exception ex) {
      throw new HiveException(ex);
    } finally {
      for (VectorExpression ve : children) {
        ocm.freeOutputColumn(ve.getOutputColumn());
      }
    }
  }

  private Mode getChildrenMode(Mode mode, Class<?> udf) {
    if (mode.equals(Mode.FILTER) && (udf.equals(GenericUDFOPAnd.class) || udf.equals(GenericUDFOPOr.class))) {
      return Mode.FILTER;
    }
    return Mode.PROJECTION;
  }

  private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object...args)
      throws HiveException {
    VectorExpression ve = null;
    Constructor<?> ctor = getConstructor(vclass);
    int numParams = ctor.getParameterTypes().length;
    int argsLength = (args == null) ? 0 : args.length;
    try {
      if (numParams == 0) {
        ve = (VectorExpression) ctor.newInstance();
      } else if (numParams == argsLength) {
        ve = (VectorExpression) ctor.newInstance(args);
      } else if (numParams == argsLength + 1) {
        // Additional argument is needed, which is the outputcolumn.
        String outType;

        // Special handling for decimal because decimal types need scale and precision parameter.
        // This special handling should be avoided by using returnType uniformly for all cases.
        if (returnType != null) {
          outType = getNormalizedTypeName(returnType.getTypeName()).toLowerCase();
        } else {
          outType = ((VectorExpression) vclass.newInstance()).getOutputType();
        }
        int outputCol = ocm.allocateOutputColumn(outType);
        Object [] newArgs = Arrays.copyOf(args, numParams);
        newArgs[numParams-1] = outputCol;
        ve = (VectorExpression) ctor.newInstance(newArgs);
        ve.setOutputType(outType);
      }
    } catch (Exception ex) {
      throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex);
    }
    return ve;
  }

  private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
      List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {

    List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
    childExpr = constantFoldedChildren;
    //First handle special cases
    if (udf instanceof GenericUDFBetween) {
      return getBetweenFilterExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFIn) {
      return getInExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFOPPositive) {
      return getIdentityExpression(childExpr);
    } else if (udf instanceof GenericUDFCoalesce) {

      // Coalesce is a special case because it can take variable number of arguments.
      return getCoalesceExpression(childExpr, returnType);
    } else if (udf instanceof GenericUDFBridge) {
      VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
          returnType);
      if (v != null) {
        return v;
      }
    } else if (udf instanceof GenericUDFToDecimal) {
      return getCastToDecimal(childExpr, returnType);
    }

    // Now do a general lookup
    Class<?> udfClass = udf.getClass();
    if (udf instanceof GenericUDFBridge) {
      udfClass = ((GenericUDFBridge) udf).getUdfClass();
    }

    VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode, returnType);

    if (ve == null) {
      throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
    }

    return ve;
  }

  private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, TypeInfo returnType)
      throws HiveException {
    int[] inputColumns = new int[childExpr.size()];
    VectorExpression[] vectorChildren = null;
    try {
      vectorChildren = getVectorExpressions(childExpr, Mode.PROJECTION);

      int i = 0;
      for (VectorExpression ve : vectorChildren) {
        inputColumns[i++] = ve.getOutputColumn();
      }

      int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName()));
      VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn);
      vectorCoalesce.setOutputType(returnType.getTypeName());
      vectorCoalesce.setChildExpressions(vectorChildren);
      return vectorCoalesce;
    } finally {
      // Free the output columns of the child expressions.
      if (vectorChildren != null) {
        for (VectorExpression v : vectorChildren) {
          ocm.freeOutputColumn(v.getOutputColumn());
        }
      }
    }
  }

  /**
   * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
   */
  private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType)
      throws HiveException {
    ExprNodeDesc colExpr = childExpr.get(0);

    TypeInfo colTypeInfo = colExpr.getTypeInfo();
    String colType = colExpr.getTypeString();

    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenForInList =
        foldConstantsForUnaryExprs(childExpr.subList(1, childExpr.size()));

    /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
     * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
     * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
     * logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would
     * become FALSE in the WHERE clause, and cause the row in question to be filtered out.
     * See the discussion in Jira HIVE-5583.
     */

    VectorExpression expr = null;

    // determine class
    Class<?> cl = null;
    if (isIntFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
      long[] inVals = new long[childrenForInList.size()];
      for (int i = 0; i != inVals.length; i++) {
        inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((ILongInExpr) expr).setInListValues(inVals);
    } else if (isTimestampFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
      long[] inVals = new long[childrenForInList.size()];
      for (int i = 0; i != inVals.length; i++) {
        inVals[i] = getTimestampScalar(childrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((ILongInExpr) expr).setInListValues(inVals);
    } else if (isStringFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
      byte[][] inVals = new byte[childrenForInList.size()][];
      for (int i = 0; i != inVals.length; i++) {
        inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((IStringInExpr) expr).setInListValues(inVals);
    } else if (isFloatFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
      double[] inValsD = new double[childrenForInList.size()];
      for (int i = 0; i != inValsD.length; i++) {
        inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((IDoubleInExpr) expr).setInListValues(inValsD);
    } else if (isDecimalFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
      Decimal128[] inValsD = new Decimal128[childrenForInList.size()];
      for (int i = 0; i != inValsD.length; i++) {
        inValsD[i] = (Decimal128) getVectorTypeScalarValue(
            (ExprNodeConstantDescchildrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((IDecimalInExpr) expr).setInListValues(inValsD);
    } else if (isDateFamily(colType)) {
      cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
      long[] inVals = new long[childrenForInList.size()];
      for (int i = 0; i != inVals.length; i++) {
        inVals[i] = (Integer) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
      }
      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType);
      ((ILongInExpr) expr).setInListValues(inVals);
    }

    // Return the desired VectorExpression if found. Otherwise, return null to cause
    // execution to fall back to row mode.
    return expr;
  }

  private byte[] getStringScalarAsByteArray(ExprNodeConstantDesc exprNodeConstantDesc)
      throws HiveException {
    Object o = getScalarValue(exprNodeConstantDesc);
    if (!(o instanceof byte[])) {
      throw new HiveException("Expected constant argument of type string");
    }
    return (byte[]) o;
  }

  /**
   * Invoke special handling for expressions that can't be vectorized by regular
   * descriptor based lookup.
   */
  private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf,
      List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
    Class<? extends UDF> cl = udf.getUdfClass();
    if (isCastToIntFamily(cl)) {
      return getCastToLongExpression(childExpr);
    } else if (cl.equals(UDFToBoolean.class)) {
      return getCastToBoolean(childExpr);
    } else if (isCastToFloatFamily(cl)) {
      return getCastToDoubleExpression(cl, childExpr, returnType);
    } else if (cl.equals(UDFToString.class)) {
      return getCastToString(childExpr, returnType);
    }
    return null;
  }

  private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType)
      throws HiveException {
    ExprNodeDesc child = childExpr.get(0);
    String inputType = childExpr.get(0).getTypeString();
    if (child instanceof ExprNodeConstantDesc) {
      // Return a constant vector expression
      Object constantValue = ((ExprNodeConstantDesc) child).getValue();
      Decimal128 decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
      return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION);
    }
    if (isIntFamily(inputType)) {
      return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType);
    } else if (isFloatFamily(inputType)) {
      return createVectorExpression(CastDoubleToDecimal.class, childExpr, Mode.PROJECTION, returnType);
    } else if (decimalTypePattern.matcher(inputType).matches()) {
      return createVectorExpression(CastDecimalToDecimal.class, childExpr, Mode.PROJECTION,
          returnType);
    } else if (isStringFamily(inputType)) {
      return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType);
    } else if (isDatetimeFamily(inputType)) {
      return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType);
    }
    throw new HiveException("Unhandled cast input type: " + inputType);
  }

  private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException {
    PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
    String typename = type.getTypeName();
    Decimal128 d = new Decimal128();
    int scale = HiveDecimalUtils.getScaleForType(ptinfo);
    switch (ptinfo.getPrimitiveCategory()) {
      case FLOAT:
        float floatVal = ((Float) scalar).floatValue();
        d.update(floatVal, (short) scale);
        break;
      case DOUBLE:
        double doubleVal = ((Double) scalar).doubleValue();
        d.update(doubleVal, (short) scale);
        break;
      case BYTE:
        byte byteVal = ((Byte) scalar).byteValue();
        d.update(byteVal, (short) scale);
        break;
      case SHORT:
        short shortVal = ((Short) scalar).shortValue();
        d.update(shortVal, (short) scale);
        break;
      case INT:
        int intVal = ((Integer) scalar).intValue();
        d.update(intVal, (short) scale);
        break;
      case LONG:
        long longVal = ((Long) scalar).longValue();
        d.update(longVal, (short) scale);
        break;
      case DECIMAL:
        HiveDecimal decimalVal = (HiveDecimal) scalar;
        d.update(decimalVal.unscaledValue(), (short) scale);
        break;
      default:
        throw new HiveException("Unsupported type "+typename+" for cast to Decimal128");
    }
    return d;
  }

  private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, TypeInfo returnType)
      throws HiveException {
    String inputType = childExpr.get(0).getTypeString();
    if (inputType.equals("boolean")) {
      // Boolean must come before the integer family. It's a special case.
      return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, Mode.PROJECTION, null);
    } else if (isIntFamily(inputType)) {
      return createVectorExpression(CastLongToString.class, childExpr, Mode.PROJECTION, null);
    } else if (isDecimalFamily(inputType)) {
      return createVectorExpression(CastDecimalToString.class, childExpr, Mode.PROJECTION, returnType);
    } else if (isDateFamily(inputType)) {
      return createVectorExpression(CastDateToString.class, childExpr, Mode.PROJECTION, returnType);
    }
    /* The string type is deliberately omitted -- the planner removes string to string casts.
     * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
     */

    throw new HiveException("Unhandled cast input type: " + inputType);
  }

  private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr,
      TypeInfo returnType) throws HiveException {
    String inputType = childExpr.get(0).getTypeString();
    if (isIntFamily(inputType)) {
      return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION, returnType);
    } else if (inputType.equals("timestamp")) {
      return createVectorExpression(CastTimestampToDoubleViaLongToDouble.class, childExpr, Mode.PROJECTION,
          returnType);
    } else if (isFloatFamily(inputType)) {

      // float types require no conversion, so use a no-op
      return getIdentityExpression(childExpr);
    }
    // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.

    return null;
  }

  private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
      throws HiveException {
    String inputType = childExpr.get(0).getTypeString();
    // Long and double are handled using descriptors, string needs to be specially handled.
    if (inputType.equals("string")) {
      // string casts to false if it is 0 characters long, otherwise true
      VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr,
          Mode.PROJECTION, null);

      int outputCol = ocm.allocateOutputColumn("integer");
      VectorExpression lenToBoolExpr =
          new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol);
      lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
      ocm.freeOutputColumn(lenExpr.getOutputColumn());
      return lenToBoolExpr;
    }
    // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op

    return null;
  }

  private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr)
      throws HiveException {
    String inputType = childExpr.get(0).getTypeString();
    // Float family, timestamp are handled via descriptor based lookup, int family needs
    // special handling.
    if (isIntFamily(inputType)) {
      // integer and boolean types require no conversion, so use a no-op
      return getIdentityExpression(childExpr);
    }
    // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.

    return null;
  }

  /* Get a [NOT] BETWEEN filter expression. This is treated as a special case
   * because the NOT is actually specified in the expression tree as the first argument,
   * and we don't want any runtime cost for that. So creating the VectorExpression
   * needs to be done differently than the standard way where all arguments are
   * passed to the VectorExpression constructor.
   */
  private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType)
      throws HiveException {

    if (mode == Mode.PROJECTION) {

      // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer
      // knows to revert to row-at-a-time execution.
      return null;
    }

    boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
    ExprNodeDesc colExpr = childExpr.get(1);

    // The children after not, might need a cast. Get common types for the two comparisons.
    // Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
    // cast
    TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(),
        childExpr.get(2).getTypeInfo());
    if (commonType == null) {

      // Can't vectorize
      return null;
    }
    commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
    if (commonType == null) {

      // Can't vectorize
      return null;
    }

    List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>();

    for (ExprNodeDesc desc: childExpr.subList(1, 4)) {
      if (commonType.equals(desc.getTypeInfo())) {
        castChildren.add(desc);
      } else {
        GenericUDF castUdf = getGenericUDFForCast(commonType);
        ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf,
            Arrays.asList(new ExprNodeDesc[] { desc }));
        castChildren.add(engfd);
      }
    }
    String colType = commonType.getTypeName();

    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(castChildren);

    // determine class
    Class<?> cl = null;
    if (isIntFamily(colType) && !notKeywordPresent) {
      cl = FilterLongColumnBetween.class;
    } else if (isIntFamily(colType) && notKeywordPresent) {
      cl = FilterLongColumnNotBetween.class;
    } else if (isFloatFamily(colType) && !notKeywordPresent) {
      cl = FilterDoubleColumnBetween.class;
    } else if (isFloatFamily(colType) && notKeywordPresent) {
      cl = FilterDoubleColumnNotBetween.class;
    } else if (colType.equals("string") && !notKeywordPresent) {
      cl = FilterStringColumnBetween.class;
    } else if (colType.equals("string") && notKeywordPresent) {
      cl = FilterStringColumnNotBetween.class;
    } else if (colType.equals("timestamp")) {

      // Get timestamp boundary values as longs instead of the expected strings
      long left = getTimestampScalar(childExpr.get(2));
      long right = getTimestampScalar(childExpr.get(3));
      childrenAfterNot = new ArrayList<ExprNodeDesc>();
      childrenAfterNot.add(colExpr);
      childrenAfterNot.add(new ExprNodeConstantDesc(left));
      childrenAfterNot.add(new ExprNodeConstantDesc(right));
      if (notKeywordPresent) {
        cl = FilterLongColumnNotBetween.class;
      } else {
        cl = FilterLongColumnBetween.class;
      }
    } else if (isDecimalFamily(colType) && !notKeywordPresent) {
      cl = FilterDecimalColumnBetween.class;
    } else if (isDecimalFamily(colType) && notKeywordPresent) {
      cl = FilterDecimalColumnNotBetween.class;
    } else if (isDateFamily(colType) && !notKeywordPresent) {
      cl = FilterLongColumnBetween.class;
    } else if (isDateFamily(colType) && notKeywordPresent) {
      cl = FilterLongColumnNotBetween.class;
    }
    return createVectorExpression(cl, childrenAfterNot, Mode.PROJECTION, returnType);
  }

  /*
   * Return vector expression for a custom (i.e. not built-in) UDF.
   */
  private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr)
      throws HiveException {

    //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
    List<ExprNodeDesc> childExprList = expr.getChildren();

    // argument descriptors
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildren().size()];
    for (int i = 0; i < argDescs.length; i++) {
      argDescs[i] = new VectorUDFArgDesc();
    }

    // positions of variable arguments (columns or non-constant expressions)
    List<Integer> variableArgPositions = new ArrayList<Integer>();

    // Column numbers of batch corresponding to expression result arguments
    List<Integer> exprResultColumnNums = new ArrayList<Integer>();

    // Prepare children
    List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();

    for (int i = 0; i < childExprList.size(); i++) {
      ExprNodeDesc child = childExprList.get(i);
      if (child instanceof ExprNodeGenericFuncDesc) {
        VectorExpression e = getVectorExpression(child, Mode.PROJECTION);
        vectorExprs.add(e);
        variableArgPositions.add(i);
        exprResultColumnNums.add(e.getOutputColumn());
        argDescs[i].setVariable(e.getOutputColumn());
      } else if (child instanceof ExprNodeColumnDesc) {
        variableArgPositions.add(i);
        argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
      } else if (child instanceof ExprNodeConstantDesc) {

        // this is a constant
        argDescs[i].setConstant((ExprNodeConstantDesc) child);
      } else {
        throw new HiveException("Unable to vectorize Custom UDF");
      }
    }

    // Allocate output column and get column number;
    int outputCol = -1;
    String resultType = expr.getTypeInfo().getTypeName();
    String resultColVectorType = getNormalizedTypeName(resultType);

    outputCol = ocm.allocateOutputColumn(resultColVectorType);

    // Make vectorized operator
    VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);

    // Set child expressions
    VectorExpression[] childVEs = null;
    if (exprResultColumnNums.size() != 0) {
      childVEs = new VectorExpression[exprResultColumnNums.size()];
      for (int i = 0; i < childVEs.length; i++) {
        childVEs[i] = vectorExprs.get(i);
      }
    }
    ve.setChildExpressions(childVEs);

    // Free output columns if inputs have non-leaf expression trees.
    for (Integer i : exprResultColumnNums) {
      ocm.freeOutputColumn(i);
    }
    return ve;
  }

  public static boolean isStringFamily(String resultType) {
    return resultType.equalsIgnoreCase("string");
  }

  public static boolean isDatetimeFamily(String resultType) {
    return resultType.equalsIgnoreCase("timestamp") || resultType.equalsIgnoreCase("date");
  }

  public static boolean isTimestampFamily(String resultType) {
    return resultType.equalsIgnoreCase("timestamp");
  }
 
  public static boolean isDateFamily(String resultType) {
    return resultType.equalsIgnoreCase("date");
  }
 
  // return true if this is any kind of float
  public static boolean isFloatFamily(String resultType) {
    return resultType.equalsIgnoreCase("double")
        || resultType.equalsIgnoreCase("float");
  }

  // Return true if this data type is handled in the output vector as an integer.
  public static boolean isIntFamily(String resultType) {
    return resultType.equalsIgnoreCase("tinyint")
        || resultType.equalsIgnoreCase("smallint")
        || resultType.equalsIgnoreCase("int")
        || resultType.equalsIgnoreCase("bigint")
        || resultType.equalsIgnoreCase("boolean")
        || resultType.equalsIgnoreCase("long");
  }

  public static boolean isDecimalFamily(String colType) {
      return decimalTypePattern.matcher(colType).matches();
  }

  private Object getScalarValue(ExprNodeConstantDesc constDesc)
      throws HiveException {
    if (constDesc.getTypeString().equalsIgnoreCase("String")) {
      try {
         byte[] bytes = ((String) constDesc.getValue()).getBytes("UTF-8");
         return bytes;
      } catch (Exception ex) {
        throw new HiveException(ex);
      }
    } else if (constDesc.getTypeString().equalsIgnoreCase("boolean")) {
      if (constDesc.getValue().equals(Boolean.valueOf(true))) {
        return 1;
      } else {
        return 0;
      }
    } else if (decimalTypePattern.matcher(constDesc.getTypeString()).matches()) {
      HiveDecimal hd = (HiveDecimal) constDesc.getValue();
      Decimal128 dvalue = new Decimal128();
      dvalue.update(hd.unscaledValue(), (short) hd.scale());
      return dvalue;
    } else {
      return constDesc.getValue();
    }
  }

  private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc)
      throws HiveException {
    Object o = getScalarValue(constDesc);
    if (o instanceof Integer) {
      return (Integer) o;
    } else if (o instanceof Long) {
      return (Long) o;
    }
    throw new HiveException("Unexpected type when converting to long : "+o.getClass().getSimpleName());
  }

  private double getNumericScalarAsDouble(ExprNodeDesc constDesc)
      throws HiveException {
    Object o = getScalarValue((ExprNodeConstantDesc) constDesc);
    if (o instanceof Double) {
      return (Double) o;
    } else if (o instanceof Float) {
      return (Float) o;
    } else if (o instanceof Integer) {
      return (Integer) o;
    } else if (o instanceof Long) {
      return (Long) o;
    }
    throw new HiveException("Unexpected type when converting to double");
  }

  private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws HiveException {
    String t = constDesc.getTypeInfo().getTypeName();
    if (isTimestampFamily(t)) {
      return TimestampUtils.getTimeNanoSec((Timestamp) getScalarValue(constDesc));
    } else if (isDateFamily(t)) {
      return DateWritable.dateToDays((Date) getScalarValue(constDesc));
    } else {
      return getScalarValue(constDesc);
    }
  }

  // Get a timestamp as a long in number of nanos, from a string constant or cast
  private long getTimestampScalar(ExprNodeDesc expr) throws HiveException {
    if (expr instanceof ExprNodeGenericFuncDesc &&
        ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) {
      return evaluateCastToTimestamp(expr);
    }
    if (!(expr instanceof ExprNodeConstantDesc)) {
      throw new HiveException("Constant timestamp value expected for expression argument. " +
          "Non-constant argument not supported for vectorization.");
    }
    ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
    if (constExpr.getTypeString().equals("string")) {

      // create expression tree with type cast from string to timestamp
      ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc();
      GenericUDFTimestamp f = new GenericUDFTimestamp();
      expr2.setGenericUDF(f);
      ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
      children.add(expr);
      expr2.setChildren(children);

      // initialize and evaluate
      return evaluateCastToTimestamp(expr2);
    }

    throw new HiveException("Udf: unhandled constant type for scalar argument. "
        + "Expecting string.");
  }

  private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException {
    ExprNodeGenericFuncDesc expr2 = (ExprNodeGenericFuncDesc) expr;
    ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2);
    ObjectInspector output = evaluator.initialize(null);
    Object constant = evaluator.evaluate(null);
    Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);

    if (!(java instanceof Timestamp)) {
      throw new HiveException("Udf: failed to convert to timestamp");
    }
    Timestamp ts = (Timestamp) java;
    return TimestampUtils.getTimeNanoSec(ts);
  }

  private Constructor<?> getConstructor(Class<?> cl) throws HiveException {
    try {
      Constructor<?> [] ctors = cl.getDeclaredConstructors();
      if (ctors.length == 1) {
        return ctors[0];
      }
      Constructor<?> defaultCtor = cl.getConstructor();
      for (Constructor<?> ctor : ctors) {
        if (!ctor.equals(defaultCtor)) {
          return ctor;
        }
      }
      throw new HiveException("Only default constructor found");
    } catch (Exception ex) {
      throw new HiveException(ex);
    }
  }

  static String getNormalizedTypeName(String colType){
    String normalizedType = null;
    if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) {
      normalizedType = "Double";
    } else if (colType.equalsIgnoreCase("String")) {
      normalizedType = "String";
   } else if (decimalTypePattern.matcher(colType).matches()) {
      //Return the decimal type as is, it includes scale and precision.
      normalizedType = colType;
    } else {
      normalizedType = "Long";
    }
    return normalizedType;
  }

  static Object[][] aggregatesDefinition = {
    {"min",       "Long",   VectorUDAFMinLong.class},
    {"min",       "Double", VectorUDAFMinDouble.class},
    {"min",       "String", VectorUDAFMinString.class},
    {"min",       "Decimal",VectorUDAFMinDecimal.class},
    {"max",       "Long",   VectorUDAFMaxLong.class},
    {"max",       "Double", VectorUDAFMaxDouble.class},
    {"max",       "String", VectorUDAFMaxString.class},
    {"max",       "Decimal",VectorUDAFMaxDecimal.class},
    {"count",     null,     VectorUDAFCountStar.class},
    {"count",     "Long",   VectorUDAFCount.class},
    {"count",     "Double", VectorUDAFCount.class},
    {"count",     "String", VectorUDAFCount.class},
    {"count",     "Decimal",VectorUDAFCount.class},
    {"sum",       "Long",   VectorUDAFSumLong.class},
    {"sum",       "Double", VectorUDAFSumDouble.class},
    {"sum",       "Decimal",VectorUDAFSumDecimal.class},
    {"avg",       "Long",   VectorUDAFAvgLong.class},
    {"avg",       "Double", VectorUDAFAvgDouble.class},
    {"avg",       "Decimal",VectorUDAFAvgDecimal.class},
    {"variance""Long",   VectorUDAFVarPopLong.class},
    {"var_pop",   "Long",   VectorUDAFVarPopLong.class},
    {"variance""Double", VectorUDAFVarPopDouble.class},
    {"var_pop",   "Double", VectorUDAFVarPopDouble.class},
    {"variance""Decimal",VectorUDAFVarPopDecimal.class},
    {"var_pop",   "Decimal",VectorUDAFVarPopDecimal.class},
    {"var_samp""Long",   VectorUDAFVarSampLong.class},
    {"var_samp" , "Double", VectorUDAFVarSampDouble.class},
    {"var_samp" , "Decimal",VectorUDAFVarSampDecimal.class},
    {"std",       "Long",   VectorUDAFStdPopLong.class},
    {"stddev",    "Long",   VectorUDAFStdPopLong.class},
    {"stddev_pop","Long",   VectorUDAFStdPopLong.class},
    {"std",       "Double", VectorUDAFStdPopDouble.class},
    {"stddev",    "Double", VectorUDAFStdPopDouble.class},
    {"stddev_pop","Double", VectorUDAFStdPopDouble.class},
    {"std",       "Decimal",VectorUDAFStdPopDecimal.class},
    {"stddev",    "Decimal",VectorUDAFStdPopDecimal.class},
    {"stddev_pop","Decimal",VectorUDAFStdPopDecimal.class},
    {"stddev_samp","Long",  VectorUDAFStdSampLong.class},
    {"stddev_samp","Double",VectorUDAFStdSampDouble.class},
    {"stddev_samp","Decimal",VectorUDAFStdSampDecimal.class},
  };

  public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc)
      throws HiveException {

    ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
    VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];

    for (int i = 0; i< paramDescList.size(); ++i) {
      ExprNodeDesc exprDesc = paramDescList.get(i);
      vectorParams[i] = this.getVectorExpression(exprDesc, Mode.PROJECTION);
    }

    String aggregateName = desc.getGenericUDAFName();
    String inputType = null;

    if (paramDescList.size() > 0) {
      ExprNodeDesc inputExpr = paramDescList.get(0);
      inputType = getNormalizedTypeName(inputExpr.getTypeString());
      if (decimalTypePattern.matcher(inputType).matches()) {
        inputType = "Decimal";
      }
    }

    for (Object[] aggDef : aggregatesDefinition) {
      if (aggregateName.equalsIgnoreCase((String) aggDef[0]) &&
          ((aggDef[1] == null && inputType == null) ||
          (aggDef[1] != null && aggDef[1].equals(inputType)))) {
        Class<? extends VectorAggregateExpression> aggClass =
            (Class<? extends VectorAggregateExpression>) (aggDef[2]);
        try
        {
          Constructor<? extends VectorAggregateExpression> ctor =
              aggClass.getConstructor(VectorExpression.class);
          VectorAggregateExpression aggExpr = ctor.newInstance(
              vectorParams.length > 0 ? vectorParams[0] : null);
          aggExpr.init(desc);
          return aggExpr;
        } catch (Exception e) {
          throw new HiveException("Internal exception for vector aggregate : \"" +
               aggregateName + "\" for type: \"" + inputType + "", e);
        }
      }
    }

    throw new HiveException("Vector aggregate not implemented: \"" + aggregateName +
        "\" for type: \"" + inputType + "");
  }

  public Map<Integer, String> getOutputColumnTypeMap() {
    Map<Integer, String> map = new HashMap<Integer, String>();
    for (int i = 0; i < ocm.outputColCount; i++) {
      String type = ocm.outputColumnsTypes[i];
      map.put(i+this.firstOutputColumnIndex, type);
    }
    return map;
  }

  public Map<String, Integer> getColumnMap() {
    return columnMap;
  }

  public void addToColumnMap(String columnName, int outputColumn) throws HiveException {
    if (columnMap.containsKey(columnName) && (columnMap.get(columnName) != outputColumn)) {
      throw new HiveException(String.format("Column %s is already mapped to %d. Cannot remap to %d.",
          columnName, columnMap.get(columnName), outputColumn));
    }
    columnMap.put(columnName, outputColumn);
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext$OutputColumnManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.