Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.MapJoinOperator


    try {
      LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtxMap = newWork
          .getOpParseCtxMap();
      QBJoinTree newJoinTree = newWork.getJoinTree();
      // generate the map join operator; already checked the map join
      MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op,
          newJoinTree, mapJoinPos, true);
      // generate the local work and return the big table alias
      String bigTableAlias = MapJoinProcessor
          .genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos);
      // clean up the mapred work
View Full Code Here


        valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
        filterMap, op.getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());

    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar);

    OpParseContext ctx = new OpParseContext(outputRS);
    opParseCtxMap.put(mapJoinOp, ctx);

    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
    mapJoinOp.setColumnExprMap(colExprMap);

    // change the children of the original join operator to point to the map
    // join operator
    List<Operator<? extends Serializable>> childOps = op.getChildOperators();
    for (Operator<? extends Serializable> childOp : childOps) {
      childOp.replaceParent(op, mapJoinOp);
    }

    mapJoinOp.setChildOperators(childOps);
    mapJoinOp.setParentOperators(newParentOps);
    op.setChildOperators(null);
    op.setParentOperators(null);

    return mapJoinOp;
  }
View Full Code Here

        && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN);


    LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtxMap = pctx
        .getOpParseCtx();
    MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos,
        noCheckOuterJoin);
    // create a dummy select to select all columns
    genSelectPlan(pctx, mapJoinOp);
    return mapJoinOp;
  }
View Full Code Here

        Map.Entry<JoinOperator, QBJoinTree> joinEntry = joinCtxIter.next();
        JoinOperator joinOp = joinEntry.getKey();
        QBJoinTree qbJoin = joinEntry.getValue();
        int mapJoinPos = mapSideJoin(joinOp, qbJoin);
        if (mapJoinPos >= 0) {
          MapJoinOperator mapJoinOp = generateMapJoinOperator(pactx, joinOp, qbJoin, mapJoinPos);
          listMapJoinOps.add(mapJoinOp);
          mapJoinMap.put(mapJoinOp, qbJoin);
        } else {
          joinMap.put(joinOp, qbJoin);
        }
View Full Code Here

      }

      newPlan.setMapLocalWork(localPlan);

      // construct a map join and set it as the child operator of tblScan_op
      MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory
          .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps);
      // change the children of the original join operator to point to the map
      // join operator
      List<Operator<? extends Serializable>> childOps = cloneJoinOp
          .getChildOperators();
      for (Operator<? extends Serializable> childOp : childOps) {
        childOp.replaceParent(cloneJoinOp, mapJoinOp);
      }
      mapJoinOp.setChildOperators(childOps);

      HiveConf jc = new HiveConf(parseCtx.getConf(),
          GenMRSkewJoinProcessor.class);

      newPlan.setNumMapTasks(HiveConf
View Full Code Here

        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      // mapjoin should not affected by join reordering
      mapJoinOp.getConf().resetOrder();

      HiveConf conf = context.getParseCtx().getConf();

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      mapJoinOp.getConf().setHashTableMemoryUsage(hashtableMemoryUsage);
      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for table sink "
          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group by");
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] orders = mapJoinOp.getConf().getTagOrder();

      // todo: support tez/vectorization
      boolean useNontaged = conf.getBoolVar(
          HiveConf.ConfVars.HIVECONVERTJOINUSENONSTAGED) &&
          conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
          !conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> directOperators =
          new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          directOperators.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        boolean directFetchable = useNontaged &&
            (parent instanceof TableScanOperator || parent instanceof MapJoinOperator);
        if (directFetchable) {
          // no filter, no projection. no need to stage
          smallTablesParentOp.add(null);
          directOperators.add(parent);
          hashTableSinkDesc.getKeys().put(orders[i], null);
          hashTableSinkDesc.getExprs().put(orders[i], null);
          hashTableSinkDesc.getFilters().put(orders[i], null);
        } else {
          // keep the parent id correct
          smallTablesParentOp.add(parent);
          directOperators.add(null);
        }
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        if (directFetchable) {
          parent.setChildOperators(null);
        }

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException("Expected parent operator of type TableScanOperator." +
              "Found " + parent.getClass().getName() + " instead.");
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
View Full Code Here

    MapredWork newWork = Utilities.clonePlan(origWork);
    // create a mapred task for this work
    MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
        .getParseContext().getConf());
    // generate the map join operator; already checked the map join
    MapJoinOperator newMapJoinOp =
        getMapJoinOperator(newTask, newWork, smbJoinOp, joinTree, bigTablePosition);

    // The reducer needs to be restored - Consider a query like:
    // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
    // The reducer contains a groupby, which needs to be restored.
View Full Code Here

  // Convert the join operator to a bucket map-join join operator
  protected MapJoinOperator convertJoinToBucketMapJoin(
    JoinOperator joinOp,
    SortBucketJoinProcCtx joinContext,
    ParseContext parseContext) throws SemanticException {
    MapJoinOperator mapJoinOp = MapJoinProcessor.convertMapJoin(
      parseContext.getConf(),
      parseContext.getOpParseCtx(),
      joinOp,
      pGraphContext.getJoinContext().get(joinOp),
      joinContext.getBigTablePosition(),
View Full Code Here

  // Convert the join operator to a sort-merge join operator
  protected void convertJoinToSMBJoin(
    JoinOperator joinOp,
    SortBucketJoinProcCtx smbJoinContext,
    ParseContext parseContext) throws SemanticException {
    MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext, parseContext);
    SMBMapJoinOperator smbMapJoinOp =
        convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext, parseContext);
    smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
  }
View Full Code Here

  @Override
  @SuppressWarnings("unchecked")
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
      Object... nodeOutputs) throws SemanticException {
    BucketJoinProcCtx context = (BucketJoinProcCtx) procCtx;
    MapJoinOperator mapJoinOperator = (MapJoinOperator) nd;

    // can the mapjoin present be converted to a bucketed mapjoin
    boolean convert = canConvertMapJoinToBucketMapJoin(
        mapJoinOperator, pGraphContext, context);
    HiveConf conf = context.getConf();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.MapJoinOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.