Examples of org.apache.hadoop.hive.ql.plan.BaseWork

org.apache.hadoop.hive.ql.plan.BaseWork
BaseWork. Base class for any "work" that's being done on the cluster. Items like stats gathering that are commonly used regarless of the type of work live here.


    // Right now the work graph is pretty simple. If there is no
    // Preceding work we have a root and will generate a map
    // vertex. If there is a preceding work we will generate
    // a reduce vertex
    BaseWork work;
    if (context.rootToWorkMap.containsKey(root)) {
      // having seen the root operator before means there was a branch in the
      // operator graph. There's typically two reasons for that: a) mux/demux
      // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
      // will result into a vertex with multiple FS or RS operators.


      // At this point we don't have to do anything special in this case. Just
      // run through the regular paces w/o creating a new task.
      work = context.rootToWorkMap.get(root);
    } else {
      // create a new vertex
      if (context.preceedingWork == null) {
        work = utils.createMapWork(context, root, tezWork, null);
      } else {
        work = utils.createReduceWork(context, root, tezWork);
      }
      context.rootToWorkMap.put(root, work);
    }


    // this is where we set the sort columns that we will be using for KeyValueInputMerge
    if (operator instanceof DummyStoreOperator) {
      work.addSortCols(root.getOpTraits().getSortCols().get(0));
    }


    if (!context.childToWorkMap.containsKey(operator)) {
      List<BaseWork> workItems = new LinkedList<BaseWork>();
      workItems.add(work);
      context.childToWorkMap.put(operator, workItems);
    } else {
      context.childToWorkMap.get(operator).add(work);
    }


    // this transformation needs to be first because it changes the work item itself.
    // which can affect the working of all downstream transformations.
    if (context.currentMergeJoinOperator != null) {
      // we are currently walking the big table side of the merge join. we need to create or hook up
      // merge join work.
      MergeJoinWork mergeJoinWork = null;
      if (context.opMergeJoinWorkMap.containsKey(context.currentMergeJoinOperator)) {
        // we have found a merge work corresponding to this closing operator. Hook up this work.
        mergeJoinWork = context.opMergeJoinWorkMap.get(context.currentMergeJoinOperator);
      } else {
        // we need to create the merge join work
        mergeJoinWork = new MergeJoinWork();
        mergeJoinWork.setMergeJoinOperator(context.currentMergeJoinOperator);
        tezWork.add(mergeJoinWork);
        context.opMergeJoinWorkMap.put(context.currentMergeJoinOperator, mergeJoinWork);
      }
      // connect the work correctly.
      work.addSortCols(root.getOpTraits().getSortCols().get(0));
      mergeJoinWork.addMergedWork(work, null);
      Operator<? extends OperatorDesc> parentOp =
          getParentFromStack(context.currentMergeJoinOperator, stack);
      int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
      work.setTag(pos);
      tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
      for (BaseWork parentWork : tezWork.getParents(work)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
        tezWork.disconnect(parentWork, work);
        tezWork.connect(parentWork, mergeJoinWork, edgeProp);
      }


      for (BaseWork childWork : tezWork.getChildren(work)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
        tezWork.disconnect(work, childWork);
        tezWork.connect(mergeJoinWork, childWork, edgeProp);
      }
      tezWork.remove(work);
      context.rootToWorkMap.put(root, mergeJoinWork);
      context.childToWorkMap.get(operator).remove(work);
      context.childToWorkMap.get(operator).add(mergeJoinWork);
      work = mergeJoinWork;
      context.currentMergeJoinOperator = null;
    }


    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
      for (MapJoinOperator mj: context.currentMapJoinOperators) {
        LOG.debug("Processing map join: " + mj);
        // remember the mapping in case we scan another branch of the
        // mapjoin later
        if (!context.mapJoinWorkMap.containsKey(mj)) {
          List<BaseWork> workItems = new LinkedList<BaseWork>();
          workItems.add(work);
          context.mapJoinWorkMap.put(mj, workItems);
        } else {
          context.mapJoinWorkMap.get(mj).add(work);
        }


        /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
        if (context.linkOpWithWorkMap.containsKey(mj)) {
          Map<BaseWork,TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
          if (linkWorkMap != null) {
            if (context.linkChildOpWithDummyOp.containsKey(mj)) {
              for (Operator<?> dummy: context.linkChildOpWithDummyOp.get(mj)) {
                work.addDummyOp((HashTableDummyOperator) dummy);
              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
              if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
                tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
              }


              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }


    if (!context.currentUnionOperators.isEmpty()) {
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }




    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    boolean removeParents = false;
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      removeParents = true;
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
    }
    if (removeParents) {
      for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
        root.removeParent(parent);
      }
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
      long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      if (operator instanceof DummyStoreOperator) {
        // this is the small table side.
        assert (followingWork instanceof MergeJoinWork);
        MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
        CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
        work.setTag(mergeJoinOp.getTagForOperator(operator));
        mergeJoinWork.addMergedWork(null, work);
        tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
        for (BaseWork parentWork : tezWork.getParents(work)) {
          TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
          tezWork.disconnect(parentWork, work);
          tezWork.connect(parentWork, mergeJoinWork, edgeProp);
        }
        work = mergeJoinWork;
      } else {
        // need to add this branch to the key + value info
        assert operator instanceof ReduceSinkOperator
            && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork)
                || followingWork instanceof UnionWork);
        ReduceSinkOperator rs = (ReduceSinkOperator) operator;
        ReduceWork rWork = null;
        if (followingWork instanceof MergeJoinWork) {
          MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
          rWork = (ReduceWork) mergeJoinWork.getMainWork();
        } else if (followingWork instanceof UnionWork) {
          // this can only be possible if there is merge work followed by the union
          UnionWork unionWork = (UnionWork) followingWork;
          int index = getMergeIndex(tezWork, unionWork, rs);
          // guaranteed to be instance of MergeJoinWork if index is valid
          BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
          if (baseWork instanceof MergeJoinWork) {
            MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
            // disconnect the connection to union work and connect to merge work
            followingWork = mergeJoinWork;
            rWork = (ReduceWork) mergeJoinWork.getMainWork();
          } else {
            throw new SemanticException("Unknown work type found: "
                + baseWork.getClass().getCanonicalName());
          }
        } else {
          rWork = (ReduceWork) followingWork;
        }
        GenMapRedUtils.setKeyAndValueDesc(rWork, rs);

View Full Code Here

   * @param name
   * @return BaseWork based on the name supplied will return null if name is null
   * @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
   */
  private static BaseWork getBaseWork(Configuration conf, String name) {
    BaseWork gWork = null;
    Path path = null;
    InputStream in = null;
    try {
      path = getPlanPath(conf, name);
      assert path != null;

View Full Code Here

    MapWork work = (MapWork) procCtx.rootToWorkMap.get(ts);
    if (work == null) {
      throw new AssertionError("No work found for tablescan " + ts);
    }


    BaseWork enclosingWork = getEnclosingWork(event, procCtx);
    if (enclosingWork == null) {
      throw new AssertionError("Cannot find work for operator" + event);
    }
    String sourceName = enclosingWork.getName();


    // store the vertex name in the operator pipeline
    eventDesc.setVertexName(work.getName());
    eventDesc.setInputName(work.getAliases().get(0));

View Full Code Here

   */
  public BaseWork getEnclosingWork(Operator<?> op, GenTezProcContext procCtx) {
    List<Operator<?>> ops = new ArrayList<Operator<?>>();
    findRoots(op, ops);
    for (Operator<?> r : ops) {
      BaseWork work = procCtx.rootToWorkMap.get(r);
      if (work != null) {
        return work;
      }
    }
    return null;

View Full Code Here

     *  Otherwise, we need to associate that the mapjoin op
     *  to be linked to the RS work (associated with the RS-MJ pattern).
     *
     */
    mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
    BaseWork parentWork;
    if (context.unionWorkMap.containsKey(parentRS)) {
      parentWork = context.unionWorkMap.get(parentRS);
    } else {
      assert context.childToWorkMap.get(parentRS).size() == 1;
      parentWork = context.childToWorkMap.get(parentRS).get(0);
    }


    // set the link between mapjoin and parent vertex
    int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
    if (pos == -1) {
      throw new SemanticException("Cannot find position of parent in mapjoin");
    }
    MapJoinDesc joinConf = mapJoinOp.getConf();
    long keyCount = Long.MAX_VALUE, rowCount = Long.MAX_VALUE, bucketCount = 1;
    Statistics stats = parentRS.getStatistics();
    if (stats != null) {
      keyCount = rowCount = stats.getNumRows();
      if (keyCount <= 0) {
        keyCount = rowCount = Long.MAX_VALUE;
      }
      ArrayList<String> keyCols = parentRS.getConf().getOutputKeyColumnNames();
      if (keyCols != null && !keyCols.isEmpty()) {
        // See if we can arrive at a smaller number using distinct stats from key columns.
        long maxKeyCount = 1;
        String prefix = Utilities.ReduceField.KEY.toString();
        for (String keyCol : keyCols) {
          ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
          ColStatistics cs =
              StatsUtils.getColStatisticsFromExpression(context.conf, stats, realCol);
          if (cs == null || cs.getCountDistint() <= 0) {
            maxKeyCount = Long.MAX_VALUE;
            break;
          }
          maxKeyCount *= cs.getCountDistint();
          if (maxKeyCount >= keyCount) {
            break;
          }
        }
        keyCount = Math.min(maxKeyCount, keyCount);
      }
      if (joinConf.isBucketMapJoin()) {
        OpTraits opTraits = mapJoinOp.getOpTraits();
        bucketCount = (opTraits == null) ? -1 : opTraits.getNumBuckets();
        if (bucketCount > 0) {
          // We cannot obtain a better estimate without CustomPartitionVertex providing it
          // to us somehow; in which case using statistics would be completely unnecessary.
          keyCount /= bucketCount;
        }
      }
    }
    LOG.info("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName() + " ("
      + keyCount + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
    joinConf.getParentToInput().put(pos, parentWork.getName());
    if (keyCount != Long.MAX_VALUE) {
      joinConf.getParentKeyCounts().put(pos, keyCount);
    }


    int numBuckets = -1;
    EdgeType edgeType = EdgeType.BROADCAST_EDGE;
    if (joinConf.isBucketMapJoin()) {


      // disable auto parallelism for bucket map joins
      parentRS.getConf().setReducerTraits(EnumSet.of(FIXED));


      numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
      if (joinConf.getCustomBucketMapJoin()) {
        edgeType = EdgeType.CUSTOM_EDGE;
      } else {
        edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
      }
    }
    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);


    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();
        LOG.debug("connecting "+parentWork.getName()+" with "+myWork.getName());
        tezWork.connect(parentWork, myWork, edgeProp);
        if (edgeType == EdgeType.CUSTOM_EDGE) {
          tezWork.setVertexType(myWork, VertexType.INITIALIZED_EDGES);
        }

View Full Code Here

    TezWork tezWork = context.currentTask.getWork();
    @SuppressWarnings("unchecked")
    Operator<? extends OperatorDesc> parentOp =
        (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
    // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
    BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);




    // we need to set the merge work that has been created as part of the dummy store walk. If a
    // merge work already exists for this merge join operator, add the dummy store work to the
    // merge work. Else create a merge work, add above work to the merge work
    MergeJoinWork mergeWork = null;
    if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
      // we already have the merge work corresponding to this merge join operator
      mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
    } else {
      mergeWork = new MergeJoinWork();
      tezWork.add(mergeWork);
      context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
    }


    mergeWork.setMergeJoinOperator(mergeJoinOp);
    mergeWork.addMergedWork(null, parentWork);
    tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);


    for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
      parentWork.setName(grandParentWork.getName());
      TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
      tezWork.disconnect(grandParentWork, parentWork);
      tezWork.connect(grandParentWork, mergeWork, edgeProp);
    }


    for (BaseWork childWork : tezWork.getChildren(parentWork)) {
      TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
      tezWork.disconnect(parentWork, childWork);
      tezWork.connect(mergeWork, childWork, edgeProp);
    }


    tezWork.remove(parentWork);


    DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));


    parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));


    mergeJoinOp.getParentOperators().remove(dummyOp);
    dummyOp.getChildOperators().clear();


    return true;

View Full Code Here

    if ( rWrk == null) {
      return;
    }
    Operator<? extends OperatorDesc> reducer = rWrk.getReducer();
    if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) {
      BaseWork prntWork = mrWrk.getMapWork();
      checkForCrossProduct(taskName, reducer,
          new ExtractReduceSinkInfo(null).analyze(prntWork));
    }
  }

View Full Code Here

    }
  }


  private Map<Integer, ExtractReduceSinkInfo.Info> getReducerInfo(TezWork tzWrk, String vertex, String prntVertex)
      throws SemanticException {
    BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex);
    return new ExtractReduceSinkInfo(vertex).analyze(prntWork);
  }

View Full Code Here


    // Right now the work graph is pretty simple. If there is no
    // Preceding work we have a root and will generate a map
    // vertex. If there is a preceding work we will generate
    // a reduce vertex
    BaseWork work;
    if (context.rootToWorkMap.containsKey(root)) {
      // having seen the root operator before means there was a branch in the
      // operator graph. There's typically two reasons for that: a) mux/demux
      // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
      // will result into a vertex with multiple FS or RS operators.


      // At this point we don't have to do anything special in this case. Just
      // run through the regular paces w/o creating a new task.
      work = context.rootToWorkMap.get(root);
    } else {
      // create a new vertex
      if (context.preceedingWork == null) {
        work = utils.createMapWork(context, root, tezWork, null);
      } else {
        work = utils.createReduceWork(context, root, tezWork);
      }
      context.rootToWorkMap.put(root, work);
    }


    if (!context.childToWorkMap.containsKey(operator)) {
      List<BaseWork> workItems = new LinkedList<BaseWork>();
      workItems.add(work);
      context.childToWorkMap.put(operator, workItems);
    } else {
      context.childToWorkMap.get(operator).add(work);
    }


    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
      for (MapJoinOperator mj: context.currentMapJoinOperators) {
        LOG.debug("Processing map join: " + mj);
        // remember the mapping in case we scan another branch of the 
        // mapjoin later
        if (!context.mapJoinWorkMap.containsKey(mj)) {
          List<BaseWork> workItems = new LinkedList<BaseWork>();
          workItems.add(work);
          context.mapJoinWorkMap.put(mj, workItems);
        } else {
          context.mapJoinWorkMap.get(mj).add(work);
        }


        /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
        if (context.linkOpWithWorkMap.containsKey(mj)) {
          Map<BaseWork,TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
          if (linkWorkMap != null) {
            if (context.linkChildOpWithDummyOp.containsKey(mj)) {
              for (Operator<?> dummy: context.linkChildOpWithDummyOp.get(mj)) {
                work.addDummyOp((HashTableDummyOperator) dummy);
              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
              
              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }


    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
      root.removeParent(parent);
    }


    if (!context.currentUnionOperators.isEmpty()) {      
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      // need to add this branch to the key + value info

View Full Code Here

     *  Otherwise, we need to associate that the mapjoin op
     *  to be linked to the RS work (associated with the RS-MJ pattern).
     *
     */
    mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
    BaseWork parentWork;
    if (context.unionWorkMap.containsKey(parentRS)) {
      parentWork = context.unionWorkMap.get(parentRS);
    } else {
      assert context.childToWorkMap.get(parentRS).size() == 1;
      parentWork = context.childToWorkMap.get(parentRS).get(0);
    }


    // set the link between mapjoin and parent vertex
    int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
    if (pos == -1) {
      throw new SemanticException("Cannot find position of parent in mapjoin");
    }
    LOG.debug("Mapjoin "+mapJoinOp+", pos: "+pos+" --> "+parentWork.getName());
    mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());


    int numBuckets = -1;
    EdgeType edgeType = EdgeType.BROADCAST_EDGE;
    if (mapJoinOp.getConf().isBucketMapJoin()) {
      numBuckets = (Integer) mapJoinOp.getConf().getBigTableBucketNumMapping().values().toArray()[0];
      if (mapJoinOp.getConf().getCustomBucketMapJoin()) {
        edgeType = EdgeType.CUSTOM_EDGE;
      } else {
        edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
      }
    }
    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);


    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();
        LOG.debug("connecting "+parentWork.getName()+" with "+myWork.getName());
        tezWork.connect(parentWork, myWork, edgeProp);
        
        ReduceSinkOperator r = null;
        if (parentRS.getConf().getOutputName() != null) {
          LOG.debug("Cloning reduce sink for multi-child broadcast edge");

View Full Code Here

0 1 2

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.BaseWork

org.apache.hadoop.hive.ql.exec.Utilities

org.apache.hadoop.hive.ql.optimizer.MergeJoinProc

org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck

org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc

org.apache.hadoop.hive.ql.parse.GenTezUtils

org.apache.hadoop.hive.ql.parse.GenTezWork

org.apache.hadoop.hive.ql.parse.GenTezWorkWalker

org.apache.hadoop.hive.ql.parse.TestGenTezWork

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.