Examples of reduceSinkDesc

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    }
  }


  private static boolean[] getPruneReduceSinkOpRetainFlags(
      List<String> retainedParentOpOutputCols, ReduceSinkOperator reduce) {
    ReduceSinkDesc reduceConf = reduce.getConf();
    java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf
        .getValueCols();
    boolean[] flags = new boolean[originalValueEval.size()];
    for (int i = 0; i < originalValueEval.size(); i++) {
      flags[i] = false;
      List<String> current = originalValueEval.get(i).getCols();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    return flags;
  }


  private static void pruneReduceSinkOperator(boolean[] retainFlags,
      ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> signature = oldRR.getRowSchema().getSignature();


    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();


    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();


    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }


        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }


      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }


    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils
        .getFieldSchemasFromColumnList(reduceConf.getValueCols(),
        newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      HashMap<Operator<? extends OperatorDesc>, OpParseContext> opToParseCtxMap = cppCtx
          .getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();
      List<Operator<? extends OperatorDesc>> childOperators = op
          .getChildOperators();
      List<Operator<? extends OperatorDesc>> parentOperators = op
          .getParentOperators();


      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }


      if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends OperatorDesc> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator) childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRowResolver();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(
            childJoin).get((byte) conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            ExprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);
            if (nm != null) {
              ColumnInfo cInfo = parRR.get(nm[0], nm[1]);
              if (!colLists.contains(cInfo.getInternalName())) {
                colLists.add(cInfo.getInternalName());
              }
            }
          }
        }
        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
      } else if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof ExtractOperator )
          && (childOperators.get(0).getChildOperators().size() == 1)
          && (childOperators.get(0).getChildOperators().get(0) instanceof PTFOperator )
          && ((PTFOperator)childOperators.get(0).
              getChildOperators().get(0)).getConf().forWindowing() )  {


        /*
         * For RS that are followed by Extract & PTFOp for windowing
         * - do the same thing as above. Reconstruct ValueColumn list based on what is required
         *   by the PTFOp.
         */


        assert parentOperators.size() == 1;


        PTFOperator ptfOp = (PTFOperator) childOperators.get(0).getChildOperators().get(0);
        List<String> childCols = cppCtx.getPrunedColList(ptfOp);
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childCols != null && childCols.size() > 0) {
          ArrayList<String> outColNames = op.getConf().getOutputValueColumnNames();
          for(int i=0; i < outColNames.size(); i++ ) {
            if ( childCols.contains(outColNames.get(i))) {
              ExprNodeDesc exprNode = op.getConf().getValueCols().get(i);
              flags[i] = true;
              Utilities.mergeUniqElems(colLists, exprNode.getCols());
            }
          }
        }
        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
      } else {
        // Reduce Sink contains the columns needed - no need to aggregate from
        // children
        ArrayList<ExprNodeDesc> vals = conf.getValueCols();
        for (ExprNodeDesc val : vals) {
          colLists = Utilities.mergeUniqElems(colLists, val.getCols());
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc


    Map<Byte, List<ExprNodeDesc>> keyExprMap  = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
      ReduceSinkDesc rsconf = ((ReduceSinkOperator)parentOp).getConf();
      Byte tag = (byte) rsconf.getTag();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(tag, keys);
    }


    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    // keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list
    ArrayList<ArrayList<ExprNodeDesc>> keys = new ArrayList<ArrayList<ExprNodeDesc>>();
    int keyLength = 0;
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();
      if (i == 0) {
        keyLength = now.getKeyCols().size();
      } else {
        assert (keyLength == now.getKeyCols().size());
      }
      keys.add(now.getKeyCols());
    }
    // implicit type conversion hierarchy
    for (int k = 0; k < keyLength; k++) {
      // Find the common class for type conversion
      TypeInfo commonType = keys.get(0).get(k).getTypeInfo();
      for (int i = 1; i < right.length; i++) {
        TypeInfo a = commonType;
        TypeInfo b = keys.get(i).get(k).getTypeInfo();
        commonType = FunctionRegistry.getCommonClassForComparison(a, b);
        if (commonType == null) {
          throw new SemanticException(
              "Cannot do equality join on different types: " + a.getTypeName()
                  + " and " + b.getTypeName());
        }
      }
      // Add implicit type conversion if necessary
      for (int i = 0; i < right.length; i++) {
        if (TypeInfoUtils.isConversionRequiredForComparison(
                keys.get(i).get(k).getTypeInfo(),
                commonType)) {
          keys.get(i).set(
              k,
              ParseUtils.createConversionCast(
                  keys.get(i).get(k), (PrimitiveTypeInfo)commonType));
        }
      }
    }
    // regenerate keySerializationInfo because the ReduceSinkOperator's
    // output key types might have changed.
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();


      now.setKeySerializeInfo(PlanUtils.getReduceKeyTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(now.getKeyCols(), "joinkey"), now
          .getOrder()));
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    }
  }


  private static boolean[] getPruneReduceSinkOpRetainFlags(
      List<String> retainedParentOpOutputCols, ReduceSinkOperator reduce) {
    ReduceSinkDesc reduceConf = reduce.getConf();
    java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf
        .getValueCols();
    boolean[] flags = new boolean[originalValueEval.size()];
    for (int i = 0; i < originalValueEval.size(); i++) {
      flags[i] = false;
      List<String> current = originalValueEval.get(i).getCols();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    return flags;
  }


  private static void pruneReduceSinkOperator(boolean[] retainFlags,
      ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    Map<String, ExprNodeDesc> newMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    RowResolver newRR = new RowResolver();
    ArrayList<String> originalValueOutputColNames = reduceConf
        .getOutputValueColumnNames();
    java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf
        .getValueCols();
    ArrayList<String> newOutputColNames = new ArrayList<String>();
    java.util.ArrayList<ExprNodeDesc> newValueEval = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < retainFlags.length; i++) {
      if (retainFlags[i]) {
        newValueEval.add(originalValueEval.get(i));
        String outputCol = originalValueOutputColNames.get(i);
        newOutputColNames.add(outputCol);
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }
        newMap.put(outputCol, oldMap.get(outputCol));
        ColumnInfo colInfo = oldRR.get(nm[0], nm[1]);
        newRR.put(nm[0], nm[1], colInfo);
        sig.add(colInfo);
      }
    }


    ArrayList<ExprNodeDesc> keyCols = reduceConf.getKeyCols();
    List<String> keys = new ArrayList<String>();
    RowResolver parResover = cppCtx.getOpToParseCtxMap().get(
        reduce.getParentOperators().get(0)).getRowResolver();
    for (int i = 0; i < keyCols.size(); i++) {
      keys = Utilities.mergeUniqElems(keys, keyCols.get(i).getCols());
    }
    for (int i = 0; i < keys.size(); i++) {
      String outputCol = keys.get(i);
      String[] nm = parResover.reverseLookup(outputCol);
      ColumnInfo colInfo = oldRR.get(nm[0], nm[1]);
      if (colInfo != null) {
        newRR.put(nm[0], nm[1], colInfo);
      }
    }


    cppCtx.getOpToParseCtxMap().get(reduce).setRowResolver(newRR);
    reduce.setColumnExprMap(newMap);
    reduce.getSchema().setSignature(sig);
    reduceConf.setOutputValueColumnNames(newOutputColNames);
    reduceConf.setValueCols(newValueEval);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils
        .getFieldSchemasFromColumnList(reduceConf.getValueCols(),
        newOutputColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap = cppCtx
          .getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();
      List<Operator<? extends Serializable>> childOperators = op
          .getChildOperators();
      List<Operator<? extends Serializable>> parentOperators = op
          .getParentOperators();


      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }


      if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends Serializable> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator) childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRowResolver();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(
            childJoin).get((byte) conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            ExprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);
            if (nm != null) {
              ColumnInfo cInfo = parRR.get(nm[0], nm[1]);
              if (!colLists.contains(cInfo.getInternalName())) {
                colLists.add(cInfo.getInternalName());
              }
            }
          }
        }
        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
      } else {
        // Reduce Sink contains the columns needed - no need to aggregate from
        // children
        ArrayList<ExprNodeDesc> vals = conf.getValueCols();
        for (ExprNodeDesc val : vals) {
          colLists = Utilities.mergeUniqElems(colLists, val.getCols());
        }
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    }


    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < newParentOps.size(); pos++) {
      ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = oldPar.getConf();
      Byte tag = (byte) rsconf.getTag();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(tag, keys);


      // set column transfer
      HashMap<String, ExprNodeDesc> map = (HashMap<String, ExprNodeDesc>) oldPar.getColumnExprMap();
      columnTransfer.put(tag, map);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    // keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list
    ArrayList<ArrayList<ExprNodeDesc>> keys = new ArrayList<ArrayList<ExprNodeDesc>>();
    int keyLength = 0;
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();
      if (i == 0) {
        keyLength = now.getKeyCols().size();
      } else {
        assert (keyLength == now.getKeyCols().size());
      }
      keys.add(now.getKeyCols());
    }
    // implicit type conversion hierarchy
    for (int k = 0; k < keyLength; k++) {
      // Find the common class for type conversion
      TypeInfo commonType = keys.get(0).get(k).getTypeInfo();
      for (int i = 1; i < right.length; i++) {
        TypeInfo a = commonType;
        TypeInfo b = keys.get(i).get(k).getTypeInfo();
        commonType = FunctionRegistry.getCommonClassForComparison(a, b);
        if (commonType == null) {
          throw new SemanticException(
              "Cannot do equality join on different types: " + a.getTypeName()
              + " and " + b.getTypeName());
        }
      }
      // Add implicit type conversion if necessary
      for (int i = 0; i < right.length; i++) {
        if (!commonType.equals(keys.get(i).get(k).getTypeInfo())) {
          keys.get(i).set(
              k,
              TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc(
              commonType.getTypeName(), keys.get(i).get(k)));
        }
      }
    }
    // regenerate keySerializationInfo because the ReduceSinkOperator's
    // output key types might have changed.
    for (int i = 0; i < right.length; i++) {
      Operator oi = (i == 0 && right[i] == null ? left : right[i]);
      ReduceSinkDesc now = ((ReduceSinkOperator) (oi)).getConf();


      now.setKeySerializeInfo(PlanUtils.getReduceKeyTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(now.getKeyCols(), "joinkey"), now
          .getOrder()));
    }
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.