Examples of ReduceWork

org.apache.hadoop.hive.ql.plan.ReduceWork
ReduceWork represents all the information used to run a reduce task on the cluster. It is first used when the query planner breaks the logical plan into tasks and used throughout physical optimization to track reduce-side operator plans, schema info about key/value pairs, etc ExecDriver will serialize the contents of this class and make sure it is distributed on the cluster. The ExecReducer will ultimately deserialize this class on the data nodes and setup it's operator pipeline accordingly. This class is also used in the explain command any property with the appropriate annotation will be displayed in the explain output.

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

        Utilities
        .makeList(getStringColumn("key"), getStringColumn("value")),
        outputColumns, false, -1, 1, -1));


    addMapWork(mr, src, "a", op1);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);


    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
        + "mapredplan2.out"), Utilities.defaultTd, false));


    Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);


    rWork.setReducer(op2);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

        .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")),
        Utilities.makeList(getStringColumn("key")), outputColumns, true,
        Byte.valueOf((byte) 1), Integer.MAX_VALUE, -1));


    addMapWork(mr, src2, "b", op2);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(5));
    rWork.setNeedsTagging(true);
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());


    mr.setReduceWork(rWork);
    rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
        + "mapredplan3.out"), Utilities.defaultTd, false));


    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(new ExprNodeFieldDesc(TypeInfoFactory.stringTypeInfo,
        new ExprNodeColumnDesc(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo),
        Utilities.ReduceField.VALUE.toString(), "", false), "0", false)),
        Utilities.makeList(outputColumns.get(0))), op4);


    rWork.setReducer(op5);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(getStringColumn("key"), getStringColumn("value")),
        outputColumns), op0);


    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    rWork.setNumReduceTasks(Integer.valueOf(1));
    mr.setReduceWork(rWork);


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
        + "mapredplan4.out"), Utilities.defaultTd, false));


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);


    rWork.setReducer(op2);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(getStringColumn("key"), getStringColumn("value")),
        outputColumns), op0);


    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
        + "mapredplan5.out"), Utilities.defaultTd, false));


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);


    rWork.setReducer(op2);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(getStringColumn("key"), getStringColumn("value")),
        outputColumns), op0);


    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
        + "mapredplan6.out"), Utilities.defaultTd, false));


    Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);


    Operator<ExtractDesc> op5 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op2);


    rWork.setReducer(op5);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

        });


    work = new TezWork("");


    mws = new MapWork[] { new MapWork(), new MapWork()};
    rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };


    work.addAll(mws);
    work.addAll(rws);


    int i = 0;

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

        // need to add this branch to the key + value info
        assert operator instanceof ReduceSinkOperator
            && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork)
                || followingWork instanceof UnionWork);
        ReduceSinkOperator rs = (ReduceSinkOperator) operator;
        ReduceWork rWork = null;
        if (followingWork instanceof MergeJoinWork) {
          MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
          rWork = (ReduceWork) mergeJoinWork.getMainWork();
        } else if (followingWork instanceof UnionWork) {
          // this can only be possible if there is merge work followed by the union
          UnionWork unionWork = (UnionWork) followingWork;
          int index = getMergeIndex(tezWork, unionWork, rs);
          // guaranteed to be instance of MergeJoinWork if index is valid
          BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
          if (baseWork instanceof MergeJoinWork) {
            MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
            // disconnect the connection to union work and connect to merge work
            followingWork = mergeJoinWork;
            rWork = (ReduceWork) mergeJoinWork.getMainWork();
          } else {
            throw new SemanticException("Unknown work type found: "
                + baseWork.getClass().getCanonicalName());
          }
        } else {
          rWork = (ReduceWork) followingWork;
        }
        GenMapRedUtils.setKeyAndValueDesc(rWork, rs);


        // remember which parent belongs to which tag
        int tag = rs.getConf().getTag();
        rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());


        // remember the output name of the reduce sink
        rs.getConf().setOutputName(rWork.getName());


        if (!context.connectedReduceSinks.contains(rs)) {
          // add dependency between the two work items
          TezEdgeProperty edgeProp;
          if (rWork.isAutoReduceParallelism()) {
            edgeProp =
                new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true,
                    rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
          } else {
            edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
          }
          tezWork.connect(work, followingWork, edgeProp);
          context.connectedReduceSinks.add(rs);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

    float maxPartitionFactor =
        context.conf.getFloatVar(HiveConf.ConfVars.TEZ_MAX_PARTITION_FACTOR);
    float minPartitionFactor = context.conf.getFloatVar(HiveConf.ConfVars.TEZ_MIN_PARTITION_FACTOR);
    long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);


    ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
    LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
    reduceWork.setReducer(root);
    reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));


    // All parents should be reduce sinks. We pick the one we just walked
    // to choose the number of reducers. In the join/union case they will
    // all be -1. In sort/order case where it matters there will be only
    // one parent.
    assert context.parentOfRoot instanceof ReduceSinkOperator;
    ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;


    reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());


    if (isAutoReduceParallelism && reduceSink.getConf().getReducerTraits().contains(AUTOPARALLEL)) {
      reduceWork.setAutoReduceParallelism(true);


      // configured limit for reducers
      int maxReducers = context.conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);


      // min we allow tez to pick
      int minPartition = Math.max(1, (int) (reduceSink.getConf().getNumReducers()
        * minPartitionFactor));
      minPartition = (minPartition > maxReducers) ? maxReducers : minPartition;


      // max we allow tez to pick
      int maxPartition = (int) (reduceSink.getConf().getNumReducers() * maxPartitionFactor);
      maxPartition = (maxPartition > maxReducers) ? maxReducers : maxPartition;


      reduceWork.setMinReduceTasks(minPartition);
      reduceWork.setMaxReduceTasks(maxPartition);
    }


    setupReduceSink(context, reduceWork, reduceSink);


    tezWork.add(reduceWork);


    TezEdgeProperty edgeProp;
    if (reduceWork.isAutoReduceParallelism()) {
      edgeProp =
          new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true,
              reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer);
    } else {
      edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    }


    tezWork.connect(

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork

      }
    }
    jc = job;


    ObjectCache cache = ObjectCacheFactory.getCache(jc);
    ReduceWork gWork = (ReduceWork) cache.retrieve(PLAN_KEY);
    if (gWork == null) {
      gWork = Utilities.getReduceWork(job);
      cache.cache(PLAN_KEY, gWork);
    } else {
      Utilities.setReduceWork(job, gWork);
    }


    reducer = gWork.getReducer();
    reducer.setParentOperators(null); // clear out any parents as reducer is the
    // root
    isTagged = gWork.getNeedsTagging();
    try {
      keyTableDesc = gWork.getKeyDesc();
      inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc
          .getDeserializerClass(), null);
      SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
      keyObjectInspector = inputKeyDeserializer.getObjectInspector();
      valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
      for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
        // We should initialize the SerDe with the TypeInfo when available.
        valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
        inputValueDeserializer[tag] = ReflectionUtils.newInstance(
            valueTableDesc[tag].getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null,
                                   valueTableDesc[tag].getProperties(), null);
        valueObjectInspector[tag] = inputValueDeserializer[tag]

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.ReduceWork


  /**
   * Set the number of reducers for the mapred work.
   */
  private void setNumberOfReducers() throws IOException {
    ReduceWork rWork = work.getReduceWork();
    // this is a temporary hack to fix things that are not fixed in the compiler
    Integer numReducersFromWork = rWork == null ? 0 : rWork.getNumReduceTasks();


    if (rWork == null) {
      console
          .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator");
    } else {
      if (numReducersFromWork >= 0) {
        console.printInfo("Number of reduce tasks determined at compile time: "
            + rWork.getNumReduceTasks());
      } else if (job.getNumReduceTasks() > 0) {
        int reducers = job.getNumReduceTasks();
        rWork.setNumReduceTasks(reducers);
        console
            .printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: "
            + reducers);
      } else {
        if (inputSummary == null) {
          inputSummary =  Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null);
        }
        int reducers = Utilities.estimateNumberOfReducers(conf, inputSummary, work.getMapWork(),
                                                          work.isFinalMapRed());
        rWork.setNumReduceTasks(reducers);
        console
            .printInfo("Number of reduce tasks not specified. Estimated from input data size: "
            + reducers);


      }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.