Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator


          }
        }
      }       
    }

    ReduceSinkOperator rsOp = (ReduceSinkOperatorputOpInsertMap(
      OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1),
                                      new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
      reduceSinkOutputRowResolver);
   
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }
View Full Code Here


      LineageCtx lCtx = (LineageCtx) procCtx;
      JoinOperator op = (JoinOperator)nd;
      JoinDesc jd = op.getConf();

      // The input operator to the join is always a reduce sink operator
      ReduceSinkOperator inpOp = (ReduceSinkOperator)getParent(stack);
      ReduceSinkDesc rd = inpOp.getConf();
      int tag = rd.getTag();

      // Iterate over the outputs of the join operator and merge the
      // dependencies of the columns that corresponding to the tag.
      int cnt = 0;
View Full Code Here

      // be called for leafs.
      assert(!stack.isEmpty());

      // LineageCtx
      LineageCtx lCtx = (LineageCtx) procCtx;
      ReduceSinkOperator rop = (ReduceSinkOperator)nd;

      ArrayList<ColumnInfo> col_infos = rop.getSchema().getSignature();
      Operator<? extends Serializable> inpOp = getParent(stack);
      int cnt = 0;

      // The keys are included only in case the reduce sink feeds into
      // a group by operator through a chain of forward operators
      Operator<? extends Serializable> op = rop.getChildOperators().get(0);
      while (op instanceof ForwardOperator) {
        op = op.getChildOperators().get(0);
      }

      if (op instanceof GroupByOperator) {
        for(ExprNodeDesc expr : rop.getConf().getKeyCols()) {
          lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
              ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
        }
      }

      for(ExprNodeDesc expr : rop.getConf().getValueCols()) {
        lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
            ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
      }

      return null;
View Full Code Here

      pos++;
    }

    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < newParentOps.size(); pos++) {
      ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = oldPar.getConf();
      Byte tag = (byte) rsconf.getTag();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(tag, keys);

      // set column transfer
      HashMap<String, ExprNodeDesc> map = (HashMap<String, ExprNodeDesc>) oldPar.getColumnExprMap();
      columnTransfer.put(tag, map);
    }

    // create the map-join operator
    for (pos = 0; pos < newParentOps.size(); pos++) {
View Full Code Here

      @Override
      public Object process(Node nd, Stack<Node> stack,
          NodeProcessorCtx procCtx, Object... nodeOutputs)
          throws SemanticException {
        ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx;
        ReduceSinkOperator childReduceSink = (ReduceSinkOperator)nd;
       
        if(ctx.contains(childReduceSink)) {
          return null;
        }
       
        ParseContext pGraphContext = ctx.getPctx();
        HashMap<String, String> childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink);
        ReduceSinkOperator parentRS = null;
        parentRS = findSingleParentReduceSink(childReduceSink, pGraphContext);
        if (parentRS == null) {
          ctx.addRejectedReduceSinkOperator(childReduceSink);
          return null;
        }
        HashMap<String, String> parentColumnMapping = getPartitionAndKeyColumnMapping(parentRS);
        Operator<? extends Serializable> stopBacktrackFlagOp = null;
        if (parentRS.getParentOperators() == null
            || parentRS.getParentOperators().size() == 0) {
          stopBacktrackFlagOp =  parentRS;
        } else if (parentRS.getParentOperators().size() != 1) {
          return null;
        } else {
          stopBacktrackFlagOp = parentRS.getParentOperators().get(0);
        }
       
        boolean succeed = backTrackColumnNames(childColumnMapping, childReduceSink, stopBacktrackFlagOp, pGraphContext);
        if (!succeed) {
          return null;
View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;

    ctx.getParseCtx();

    // map-join consisted on a bunch of map-only jobs, and it has been split
    // after the mapjoin
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);
View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    Operator<? extends Serializable> currTopOp = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

    ctx.setCurrTopOp(currTopOp);
View Full Code Here

   * The Node Processor for Column Pruning on Reduce Sink Operators.
   */
  public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap = cppCtx
          .getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();
      List<Operator<? extends Serializable>> childOperators = op
          .getChildOperators();
      List<Operator<? extends Serializable>> parentOperators = op
          .getParentOperators();

      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends Serializable> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator) childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRowResolver();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(
            childJoin).get((byte) conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            ExprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);
View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork currPlan = (MapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

    ctx.setCurrTopOp(currTopOp);
View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;

    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();

    // union was map only - no special processing needed
    if (uCtx.isMapOnlySubq()) {
      return (new GenMRRedSink1()).process(nd, stack, opProcCtx, nodeOutputs);
    }

    // union consisted on a bunch of map-reduce jobs, and it has been split at
    // the union
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
        .getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.