Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

org.apache.hadoop.hive.ql.exec.ReduceSinkOperator
Reduce Sink Operator sends output to the reduce stage

      // Cartesian product is not supported in strict mode
      if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict"))
        throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg());
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator)putOpInsertMap(
      OperatorFactory.getAndMakeChild(
        PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, joinTree.getNextTag(), reduceKeys.size(), numReds),
        new RowSchema(outputRS.getColumnInfos()),
        child), outputRS);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

    // SEMIJOIN HAS NO AGGREGATIONS, and we don't really use reduce values, so leave it as an empty list
    ArrayList<exprNodeDesc> reduceValues = new ArrayList<exprNodeDesc>();
    int numPartitionFields = fields.size();


    // finally generate the ReduceSink operator
    ReduceSinkOperator rsOp = (ReduceSinkOperator)  putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, -1),
                                        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()),
                                        inputOperatorInfo),
        reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);


    return rsOp;
  }

View Full Code Here

          }
        }
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator)  putOpInsertMap(
      OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1),
                                      new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
      reduceSinkOutputRowResolver);


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

      assert(parentOps.size() == joinTree.getBaseSrc().length);
      int pos = 0;
      for (String src : joinTree.getBaseSrc()) {
        if (src != null) {
          assert(parentOps.get(pos) instanceof ReduceSinkOperator);
          ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) parentOps.get(pos);


          // Get the key column names, and check if the keys are all constants
          // or columns (not expressions). If yes, proceed.
          List<String> keyColNames =
              TableAccessAnalyzer.getKeyColNames(reduceSinkOp.getConf().getKeyCols());


          if (keyColNames == null) {
            // we are done, since there are no keys to check for
            return null;
          }


          // Walk the operator tree to the TableScan and build the mapping
          // along the way for the columns that the group by uses as keys
          TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
              reduceSinkOp.getParentOperators().get(0), keyColNames);


          if (tso == null) {
            // Could not find an allowed path to a table scan operator,
            // hence we are done
            return null;

View Full Code Here


    Map<ReduceSinkOperator, ExprNodeDesc> newFilters = context.getNewfilters();


    // insert new filter between RS and parent of RS
    for (Map.Entry<ReduceSinkOperator, ExprNodeDesc> entry : newFilters.entrySet()) {
      ReduceSinkOperator reducer = entry.getKey();
      Operator<?> parent = reducer.getParentOperators().get(0);


      ExprNodeDesc expr = entry.getValue();
      if (parent instanceof FilterOperator) {
        ExprNodeDesc prev = ((FilterOperator)parent).getConf().getPredicate();
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(prev, expr);

View Full Code Here


    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      @SuppressWarnings("unchecked")
      CommonJoinOperator<JoinDesc> join = (CommonJoinOperator) nd;
      ReduceSinkOperator source = (ReduceSinkOperator) stack.get(stack.size() - 2);
      FilterOperator filter = (FilterOperator) stack.get(stack.size() - 3);
      int srcPos = join.getParentOperators().indexOf(source);


      TransitiveContext context = (TransitiveContext) procCtx;
      Map<CommonJoinOperator, int[][]> filterPropagates = context.getFilterPropates();
      Map<ReduceSinkOperator, ExprNodeDesc> newFilters = context.getNewfilters();


      int[][] targets = filterPropagates.get(join);
      if (targets == null) {
        filterPropagates.put(join, targets = getTargets(join));
      }


      List<Operator<? extends OperatorDesc>> parents = join.getParentOperators();
      for (int targetPos : targets[srcPos]) {
        ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos);
        List<ExprNodeDesc> sourceKeys = source.getConf().getKeyCols();
        List<ExprNodeDesc> targetKeys = target.getConf().getKeyCols();


        ExprNodeDesc predicate = filter.getConf().getPredicate();
        ExprNodeDesc replaced = ExprNodeDescUtils.replace(predicate, sourceKeys, targetKeys);
        if (replaced != null && !filterExists(target, replaced)) {
          ExprNodeDesc prev = newFilters.get(target);

View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;


    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork currPlan = (MapredWork) currTask.getWork();
    Operator<? extends OperatorDesc> currTopOp = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);


    ctx.setCurrTopOp(currTopOp);

View Full Code Here

   * The Node Processor for Column Pruning on Reduce Sink Operators.
   */
  public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      HashMap<Operator<? extends OperatorDesc>, OpParseContext> opToParseCtxMap = cppCtx
          .getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();
      List<Operator<? extends OperatorDesc>> childOperators = op
          .getChildOperators();
      List<Operator<? extends OperatorDesc>> parentOperators = op
          .getParentOperators();


      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }


      if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends OperatorDesc> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator) childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRowResolver();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(
            childJoin).get((byte) conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            ExprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);

View Full Code Here

      @Override
      public Object process(Node nd, Stack<Node> stack,
          NodeProcessorCtx procCtx, Object... nodeOutputs)
          throws SemanticException {
        ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx;
        ReduceSinkOperator childReduceSink = (ReduceSinkOperator)nd;


        if(ctx.contains(childReduceSink)) {
          return null;
        }


        List<Operator<? extends OperatorDesc>> childOp =
          childReduceSink.getChildOperators();
        if (childOp != null && childOp.size() == 1) {
          Operator<? extends OperatorDesc> child = childOp.get(0);
          if (child instanceof GroupByOperator || child instanceof JoinOperator) {
            ctx.addRejectedReduceSinkOperator(childReduceSink);
            return null;
          }
        }


        ParseContext pGraphContext = ctx.getPctx();
        HashMap<String, String> childColumnMapping =
          getPartitionAndKeyColumnMapping(childReduceSink);
        ReduceSinkOperator parentRS = null;
        parentRS = findSingleParentReduceSink(childReduceSink, pGraphContext);
        if (parentRS == null) {
          ctx.addRejectedReduceSinkOperator(childReduceSink);
          return null;
        }
        HashMap<String, String> parentColumnMapping = getPartitionAndKeyColumnMapping(parentRS);
        Operator<? extends OperatorDesc> stopBacktrackFlagOp = null;
        if (parentRS.getParentOperators() == null
            || parentRS.getParentOperators().size() == 0) {
          stopBacktrackFlagOp =  parentRS;
        } else if (parentRS.getParentOperators().size() != 1) {
          return null;
        } else {
          stopBacktrackFlagOp = parentRS.getParentOperators().get(0);
        }


        boolean succeed = backTrackColumnNames(childColumnMapping, childReduceSink, stopBacktrackFlagOp, pGraphContext);
        if (!succeed) {
          return null;

View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;


    ctx.getParseCtx();


    // map-join consisted on a bunch of map-only jobs, and it has been split
    // after the mapjoin
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

org.apache.hadoop.hive.ql.io.HiveKey

org.apache.hadoop.hive.ql.optimizer.BucketingSortingReduceSinkOptimizer$BucketSortReduceSinkProcessor

org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory$ColumnPrunerReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory$ConstantPropagateReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer$CorrelationNodeProc

org.apache.hadoop.hive.ql.optimizer.correlation.QueryPlanTreeTransformation

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$AbsctractReducerReducerProc

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$GroupbyReducerProc

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$ReducerReducerProc

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.