Examples of BaseWork

org.apache.hadoop.hive.ql.plan.BaseWork
BaseWork. Base class for any "work" that's being done on the cluster. Items like stats gathering that are commonly used regarless of the type of work live here.

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

  public static ReduceWork getReduceWork(Configuration conf) {
    return (ReduceWork) getBaseWork(conf, REDUCE_PLAN_NAME);
  }


  public static BaseWork getBaseWork(Configuration conf, String name) {
    BaseWork gWork = null;
    Path path = null;
    try {
      path = getPlanPath(conf, name);
      assert path != null;
      gWork = gWorkMap.get(path);
      if (gWork == null) {
        Path localPath;
        if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
          localPath = path;
        } else {
          localPath = new Path(name);
        }
        InputStream in = new FileInputStream(localPath.toUri().getPath());
        BaseWork ret = deserializePlan(in);
        gWork = ret;
        gWorkMap.put(path, gWork);
      }
      return gWork;
    } catch (FileNotFoundException fnf) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

   * @param name
   * @return BaseWork based on the name supplied will return null if name is null
   * @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
   */
  private static BaseWork getBaseWork(Configuration conf, String name) {
    BaseWork gWork = null;
    Path path = null;
    InputStream in = null;
    try {
      path = getPlanPath(conf, name);
      assert path != null;

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

    if ( rWrk == null) {
      return;
    }
    Operator<? extends OperatorDesc> reducer = rWrk.getReducer();
    if ( reducer instanceof JoinOperator ) {
      BaseWork prntWork = mrWrk.getMapWork();
      checkForCrossProduct(taskName, reducer,
          new ExtractReduceSinkInfo(null).analyze(prntWork));
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

    }
  }


  private Map<Integer, ExtractReduceSinkInfo.Info> getReducerInfo(TezWork tzWrk, String vertex, String prntVertex)
      throws SemanticException {
    BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex);
    return new ExtractReduceSinkInfo(vertex).analyze(prntWork);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

     *  Otherwise, we need to associate that the mapjoin op
     *  to be linked to the RS work (associated with the RS-MJ pattern).
     *
     */
    mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
    BaseWork parentWork;
    if (context.unionWorkMap.containsKey(parentRS)) {
      parentWork = context.unionWorkMap.get(parentRS);
    } else {
      assert context.childToWorkMap.get(parentRS).size() == 1;
      parentWork = context.childToWorkMap.get(parentRS).get(0);
    }


    // set the link between mapjoin and parent vertex
    int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
    if (pos == -1) {
      throw new SemanticException("Cannot find position of parent in mapjoin");
    }
    LOG.debug("Mapjoin "+mapJoinOp+", pos: "+pos+" --> "+parentWork.getName());
    mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());


    int numBuckets = -1;
    EdgeType edgeType = EdgeType.BROADCAST_EDGE;
    if (mapJoinOp.getConf().isBucketMapJoin()) {
      numBuckets = (Integer) mapJoinOp.getConf().getBigTableBucketNumMapping().values().toArray()[0];
      if (mapJoinOp.getConf().getCustomBucketMapJoin()) {
        edgeType = EdgeType.CUSTOM_EDGE;
      } else {
        edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
      }
    }
    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);


    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();
        LOG.debug("connecting "+parentWork.getName()+" with "+myWork.getName());
        tezWork.connect(parentWork, myWork, edgeProp);
        
        ReduceSinkOperator r = null;
        if (parentRS.getConf().getOutputName() != null) {
          LOG.debug("Cloning reduce sink for multi-child broadcast edge");

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

    Boolean skip = dispatchAndReturn(nd, opStack);


    // save some positional state
    Operator<? extends OperatorDesc> currentRoot = ctx.currentRootOperator;
    Operator<? extends OperatorDesc> parentOfRoot = ctx.parentOfRoot;
    BaseWork preceedingWork = ctx.preceedingWork;


    if (skip == null || !skip) {
      // move all the children to the front of queue
      for (Node ch : children) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

   * @param name
   * @return BaseWork based on the name supplied will return null if name is null
   * @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
   */
  private static BaseWork getBaseWork(Configuration conf, String name) {
    BaseWork gWork = null;
    Path path = null;
    InputStream in = null;
    try {
      path = getPlanPath(conf, name);
      assert path != null;

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork


    // Right now the work graph is pretty simple. If there is no
    // Preceding work we have a root and will generate a map
    // vertex. If there is a preceding work we will generate
    // a reduce vertex
    BaseWork work;
    if (context.rootToWorkMap.containsKey(root)) {
      // having seen the root operator before means there was a branch in the
      // operator graph. There's typically two reasons for that: a) mux/demux
      // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
      // will result into a vertex with multiple FS or RS operators.


      // At this point we don't have to do anything special in this case. Just
      // run through the regular paces w/o creating a new task.
      work = context.rootToWorkMap.get(root);
    } else {
      // create a new vertex
      if (context.preceedingWork == null) {
        work = utils.createMapWork(context, root, tezWork, null);
      } else {
        work = utils.createReduceWork(context, root, tezWork);
      }
      context.rootToWorkMap.put(root, work);
    }


    if (!context.childToWorkMap.containsKey(operator)) {
      List<BaseWork> workItems = new LinkedList<BaseWork>();
      workItems.add(work);
      context.childToWorkMap.put(operator, workItems);
    } else {
      context.childToWorkMap.get(operator).add(work);
    }


    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
      for (MapJoinOperator mj: context.currentMapJoinOperators) {
        LOG.debug("Processing map join: " + mj);
        // remember the mapping in case we scan another branch of the 
        // mapjoin later
        if (!context.mapJoinWorkMap.containsKey(mj)) {
          List<BaseWork> workItems = new LinkedList<BaseWork>();
          workItems.add(work);
          context.mapJoinWorkMap.put(mj, workItems);
        } else {
          context.mapJoinWorkMap.get(mj).add(work);
        }


        /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
        if (context.linkOpWithWorkMap.containsKey(mj)) {
          Map<BaseWork,TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
          if (linkWorkMap != null) {
            if (context.linkChildOpWithDummyOp.containsKey(mj)) {
              for (Operator<?> dummy: context.linkChildOpWithDummyOp.get(mj)) {
                work.addDummyOp((HashTableDummyOperator) dummy);
              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
              
              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }


    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
      root.removeParent(parent);
    }


    if (!context.currentUnionOperators.isEmpty()) {      
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      // need to add this branch to the key + value info

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

    assertTrue(ctx.rootTasks.contains(ctx.currentTask));


    TezWork work = ctx.currentTask.getWork();
    assertEquals(work.getAllWork().size(),1);


    BaseWork w = work.getAllWork().get(0);
    assertTrue(w instanceof MapWork);


    MapWork mw = (MapWork)w;


    // need to make sure names are set for tez to connect things right
    assertNotNull(w.getName());


    // map work should start with our ts op
    assertSame(mw.getAliasToWork().entrySet().iterator().next().getValue(),ts);


    // preceeding work must be set to the newly generated map

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.BaseWork

    proc.process(fs, null, ctx, (Object[])null);


    TezWork work = ctx.currentTask.getWork();
    assertEquals(work.getAllWork().size(),2);


    BaseWork w = work.getAllWork().get(1);
    assertTrue(w instanceof ReduceWork);
    assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));


    ReduceWork rw = (ReduceWork)w;


    // need to make sure names are set for tez to connect things right
    assertNotNull(w.getName());


    // map work should start with our ts op
    assertSame(rw.getReducer(),fs);


    // should have severed the ties

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.