Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

org.apache.hadoop.hive.ql.exec.ReduceSinkOperator
Reduce Sink Operator sends output to the reduce stage

        reduceSinkOutputRowResolver.putExpression(entry.getValue(),
            new ColumnInfo(field, type, null, false));
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        grpByExprs.size(), reduceValues, distinctColIndices,
        outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields,
        numReducers), new RowSchema(reduceSinkOutputRowResolver
        .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

    for (Operator<? extends OperatorDesc>parentOp : joinOp.getParentOperators()) {
      if (parentOp.getOpTraits().getNumBuckets() > 0) {
        numBuckets = (numBuckets < parentOp.getOpTraits().getNumBuckets()) ? 
            parentOp.getOpTraits().getNumBuckets() : numBuckets; 
      }
      ReduceSinkOperator rs = (ReduceSinkOperator)parentOp;
      estimatedBuckets = (estimatedBuckets < rs.getConf().getNumReducers()) ? 
          rs.getConf().getNumReducers() : estimatedBuckets;
    }


    if (numBuckets <= 0) {
      numBuckets = estimatedBuckets;
      if (numBuckets <= 0) {

View Full Code Here

      LOG.info("Operator is " + joinOp.getParentOperators().get(0).getName() +
          ". Cannot convert to bucket map join");
      return false;
    }


    ReduceSinkOperator rs = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    /*
     * this is the case when the big table is a sub-query and is probably
     * already bucketed by the join column in say a group by operation 
     */
    List<List<String>> colNames = rs.getParentOperators().get(0).getOpTraits().getBucketColNames();
    if ((colNames != null) && (colNames.isEmpty() == false)) {
      Operator<? extends OperatorDesc>parentOfParent = rs.getParentOperators().get(0);
      for (List<String>listBucketCols : parentOfParent.getOpTraits().getBucketColNames()) {
        // can happen if this operator does not carry forward the previous bucketing columns
        // for e.g. another join operator which does not carry one of the sides' key columns
        if (listBucketCols.isEmpty()) {
          continue;
        }
        int colCount = 0;
        // parent op is guaranteed to have a single list because it is a reduce sink
        for (String colName : rs.getOpTraits().getBucketColNames().get(0)) {
          // all columns need to be at least a subset of the parentOfParent's bucket cols
          ExprNodeDesc exprNodeDesc = rs.getColumnExprMap().get(colName);
          if (exprNodeDesc instanceof ExprNodeColumnDesc) {
            if (((ExprNodeColumnDesc)exprNodeDesc).getColumn().equals(listBucketCols.get(colCount))) {
              colCount++;
            } else {
              break;
            }
          }
          
          if (colCount == rs.getOpTraits().getBucketColNames().get(0).size()) {
            // all keys matched.
            int numBuckets = parentOfParent.getOpTraits().getNumBuckets();
            boolean isSubQuery = false;
            if (numBuckets < 0) {
              isSubQuery = true;
              numBuckets = rs.getConf().getNumReducers();
            }
            tezBucketJoinProcCtx.setNumBuckets(numBuckets);
            tezBucketJoinProcCtx.setIsSubQuery(isSubQuery);
            return true;
          }

View Full Code Here

      }
      ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder,
          newValueCols, bucketColumns, numBuckets, fsParent);


      // Create ReduceSink operator
      ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(rsConf, new RowSchema(outRR.getColumnInfos()), fsParent),
          outRR, parseCtx);
      rsOp.setColumnExprMap(colExprMap);


      // Create ExtractDesc
      ObjectPair<String, RowResolver> exPair = copyRowResolver(outRR);
      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));


      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);


      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);


      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }


      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);


      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }

View Full Code Here

  /**
   * The Node Processor for Column Pruning on Reduce Sink Operators.
   */
  public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator)nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
      HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap = 
          cppCtx.getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRR();
      reduceSinkDesc conf = op.getConf();
      List<Operator<? extends Serializable>> childOperators = op.getChildOperators();
      List<Operator<? extends Serializable>> parentOperators = op.getParentOperators();


      List<String> colLists = new ArrayList<String>();
      ArrayList<exprNodeDesc> keys = conf.getKeyCols();
      for (exprNodeDesc key : keys)
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());


      if ((childOperators.size() == 1) && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends Serializable> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator)childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRR();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(childJoin).get((byte)conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++)
          flags[i] = false;
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String,exprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            exprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);

View Full Code Here

      pos++;
    }


    //get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < newParentOps.size(); pos++) {
      ReduceSinkOperator oldPar = (ReduceSinkOperator)oldReduceSinkParentOps.get(pos);
      reduceSinkDesc rsconf = oldPar.getConf();
      Byte tag = (byte)rsconf.getTag();
      List<exprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(tag, keys);
    }

View Full Code Here

    for (int i = 0; i < valueCols.size(); i++)
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    
    reduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<exprNodeDesc>(), valueCols, 
                                                        outputColumns, false, -1, -1, -1); 
    ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDesc, fsRS, ts_op);
    mapredWork cplan = GenMapRedUtils.getMapRedWork();
    ParseContext parseCtx = ctx.getParseCtx();


    Task<? extends Serializable> mergeTask = TaskFactory.get(cplan, parseCtx.getConf());
    fileSinkDesc fsConf = fsOp.getConf();

View Full Code Here

  public static void setKeyAndValueDesc(mapredWork plan, Operator<? extends Serializable> topOp) {
    if (topOp == null)
      return;


    if (topOp instanceof ReduceSinkOperator) {
      ReduceSinkOperator rs = (ReduceSinkOperator)topOp;
      plan.setKeyDesc(rs.getConf().getKeySerializeInfo());
      int tag = Math.max(0, rs.getConf().getTag());
      List<tableDesc> tagToSchema = plan.getTagToValueDesc();
      while (tag + 1 > tagToSchema.size()) {
        tagToSchema.add(null);
      }
      tagToSchema.set(tag, rs.getConf().getValueSerializeInfo());
    } else {
      List<Operator<? extends Serializable>> children = topOp.getChildOperators();
      if (children != null) {
        for(Operator<? extends Serializable> op: children) {
          setKeyAndValueDesc(plan, op);

View Full Code Here

                                        new ColumnInfo(field,
                                                       type, null, false));
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator)  putOpInsertMap(
      OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields,
                                                                  numReducers),
                                        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()),
                                        inputOperatorInfo),
        reduceSinkOutputRowResolver
    );
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

      reduceSinkOutputRowResolver2.put("", t.toStringTree(),
                                       new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col,
                                           typeInfo, "", false));
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
      OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1,
                                                                  numPartitionFields, numReducers),
                                        new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()),
                                        groupByOperatorInfo),
        reduceSinkOutputRowResolver2
    );


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

org.apache.hadoop.hive.ql.io.HiveKey

org.apache.hadoop.hive.ql.optimizer.BucketingSortingReduceSinkOptimizer$BucketSortReduceSinkProcessor

org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory$ColumnPrunerReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory$ConstantPropagateReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer$CorrelationNodeProc

org.apache.hadoop.hive.ql.optimizer.correlation.QueryPlanTreeTransformation

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$AbsctractReducerReducerProc

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$GroupbyReducerProc

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$ReducerReducerProc

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.