Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.SMBJoinDesc


    SortBucketJoinProcCtx smbJoinContext,
    ParseContext parseContext) {

    String[] srcs = smbJoinContext.getSrcs();
    SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp);
    SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
    smbJop.setConf(smbJoinDesc);
    HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>();
    for (int i = 0; i < srcs.length; i++) {
      tagToAlias.put((byte) i, srcs[i]);
    }
    smbJoinDesc.setTagToAlias(tagToAlias);

    int indexInListMapJoinNoReducer =
      this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp);
    if (indexInListMapJoinNoReducer >= 0 ) {
      this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer);
      this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop);
    }

    Map<String, DummyStoreOperator> aliasToSink =
        new HashMap<String, DummyStoreOperator>();
    // For all parents (other than the big table), insert a dummy store operator
    /* Consider a query like:
     *
     * select * from
     *   (subq1 --> has a filter)
     *   join
     *   (subq2 --> has a filter)
     * on some key
     *
     * Let us assume that subq1 is the small table (either specified by the user or inferred
     * automatically). The following operator tree will be created:
     *
     * TableScan (subq1) --> Select --> Filter --> DummyStore
     *                                                         \
     *                                                          \     SMBJoin
     *                                                          /
     *                                                         /
     * TableScan (subq2) --> Select --> Filter
     */

    List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
    for (int i = 0; i < parentOperators.size(); i++) {
      Operator<? extends OperatorDesc> par = parentOperators.get(i);
      int index = par.getChildOperators().indexOf(mapJoinOp);
      par.getChildOperators().remove(index);
      if (i == smbJoinDesc.getPosBigTable()) {
        par.getChildOperators().add(index, smbJop);
      }
      else {
        DummyStoreOperator dummyStoreOp = new DummyStoreOperator();
        par.getChildOperators().add(index, dummyStoreOp);

        List<Operator<? extends OperatorDesc>> childrenOps =
            new ArrayList<Operator<? extends OperatorDesc>>();
        childrenOps.add(smbJop);
        dummyStoreOp.setChildOperators(childrenOps);

        List<Operator<? extends OperatorDesc>> parentOps =
            new ArrayList<Operator<? extends OperatorDesc>>();
        parentOps.add(par);
        dummyStoreOp.setParentOperators(parentOps);

        aliasToSink.put(srcs[i], dummyStoreOp);
        smbJop.getParentOperators().remove(i);
        smbJop.getParentOperators().add(i, dummyStoreOp);
      }
    }
    smbJoinDesc.setAliasToSink(aliasToSink);

    List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators();
    for (int i = 0; i < childOps.size(); i++) {
      Operator<? extends OperatorDesc> child = childOps.get(i);
      int index = child.getParentOperators().indexOf(mapJoinOp);
View Full Code Here


          // Bucketing and sorting keys should exactly match
          if (!(bucketPositions.equals(sortPositions))) {
            return null;
          }
          SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op;
          SMBJoinDesc smbJoinDesc = smbOp.getConf();
          int posBigTable = smbJoinDesc.getPosBigTable();

          // join keys dont match the bucketing keys
          List<ExprNodeDesc> keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable);
          if (keysBigTable.size() != bucketPositions.size()) {
            return null;
          }

          if (!validateSMBJoinKeys(smbJoinDesc, sourceTableBucketCols,
View Full Code Here

  }
 
  public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf)
      throws HiveException {
    this();
    SMBJoinDesc desc = (SMBJoinDesc) conf;
    this.conf = desc;
   
    order = desc.getTagOrder();
    numAliases = desc.getExprs().size();
    posBigTable = (byte) desc.getPosBigTable();
    filterMaps = desc.getFilterMap();
    noOuterJoin = desc.isNoOuterJoin();

    // Must obtain vectorized equivalents for filter and value expressions

    Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
    bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable),
        VectorExpressionDescriptor.Mode.FILTER);

    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    keyExpressions = vContext.getVectorExpressions(keyDesc);
    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);

    Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
    bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));

    // We are making a new output vectorized row batch.
    vOutContext = new VectorizationContext(desc.getOutputColumnNames());
    vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
  }
View Full Code Here

        new HashMap<Task<? extends Serializable>, Set<String>>();
    // Note that pathToAlias will behave as if the original plan was a join plan
    HashMap<String, ArrayList<String>> pathToAliases = currJoinWork.getMapWork().getPathToAliases();

    // generate a map join task for the big table
    SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
    Byte[] order = originalSMBJoinDesc.getTagOrder();
    int numAliases = order.length;
    Set<Integer> bigTableCandidates =
        MapJoinProcessor.getBigTableCandidates(originalSMBJoinDesc.getConds());

    HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
    Configuration conf = context.getConf();
    try {
      long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork.getMapWork(),
View Full Code Here

    }
    return false;
  }

  private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
    SMBJoinDesc desc = op.getConf();
    // Validation is the same as for map join, since the 'small' tables are not vectorized
    return validateMapJoinDesc(desc);
  }
View Full Code Here

  public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf,
    Map<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
    SMBMapJoinOperator smbJoinOp, QBJoinTree joinTree, int bigTablePos, boolean noCheckOuterJoin)
    throws SemanticException {
    // Create a new map join operator
    SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
    List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils
        .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(),
        keyTableDesc, smbJoinDesc.getExprs(),
        smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(),
        smbJoinDesc.getOutputColumnNames(),
        bigTablePos, smbJoinDesc.getConds(),
        smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());

    mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());

    RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDesc, joinRS.getRowSchema(),
View Full Code Here

  public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf,
    Map<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
    SMBMapJoinOperator smbJoinOp, QBJoinTree joinTree, int bigTablePos, boolean noCheckOuterJoin)
    throws SemanticException {
    // Create a new map join operator
    SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
    List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils
        .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(),
        keyTableDesc, smbJoinDesc.getExprs(),
        smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(),
        smbJoinDesc.getOutputColumnNames(),
        bigTablePos, smbJoinDesc.getConds(),
        smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());

    mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());

    RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDesc, joinRS.getRowSchema(),
View Full Code Here

    SortBucketJoinProcCtx smbJoinContext,
    ParseContext parseContext) {

    String[] srcs = smbJoinContext.getSrcs();
    SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp);
    SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
    smbJop.setConf(smbJoinDesc);
    HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>();
    for (int i = 0; i < srcs.length; i++) {
      tagToAlias.put((byte) i, srcs[i]);
    }
    smbJoinDesc.setTagToAlias(tagToAlias);

    int indexInListMapJoinNoReducer =
      this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp);
    if (indexInListMapJoinNoReducer >= 0 ) {
      this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer);
      this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop);
    }

    Map<String, DummyStoreOperator> aliasToSink =
        new HashMap<String, DummyStoreOperator>();
    // For all parents (other than the big table), insert a dummy store operator
    /* Consider a query like:
     *
     * select * from
     *   (subq1 --> has a filter)
     *   join
     *   (subq2 --> has a filter)
     * on some key
     *
     * Let us assume that subq1 is the small table (either specified by the user or inferred
     * automatically). The following operator tree will be created:
     *
     * TableScan (subq1) --> Select --> Filter --> DummyStore
     *                                                         \
     *                                                          \     SMBJoin
     *                                                          /
     *                                                         /
     * TableScan (subq2) --> Select --> Filter
     */

    List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
    for (int i = 0; i < parentOperators.size(); i++) {
      Operator<? extends OperatorDesc> par = parentOperators.get(i);
      int index = par.getChildOperators().indexOf(mapJoinOp);
      par.getChildOperators().remove(index);
      if (i == smbJoinDesc.getPosBigTable()) {
        par.getChildOperators().add(index, smbJop);
      }
      else {
        DummyStoreOperator dummyStoreOp = new DummyStoreOperator();
        par.getChildOperators().add(index, dummyStoreOp);

        List<Operator<? extends OperatorDesc>> childrenOps =
            new ArrayList<Operator<? extends OperatorDesc>>();
        childrenOps.add(smbJop);
        dummyStoreOp.setChildOperators(childrenOps);

        List<Operator<? extends OperatorDesc>> parentOps =
            new ArrayList<Operator<? extends OperatorDesc>>();
        parentOps.add(par);
        dummyStoreOp.setParentOperators(parentOps);

        aliasToSink.put(srcs[i], dummyStoreOp);
        smbJop.getParentOperators().remove(i);
        smbJop.getParentOperators().add(i, dummyStoreOp);
      }
    }
    smbJoinDesc.setAliasToSink(aliasToSink);

    List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators();
    for (int i = 0; i < childOps.size(); i++) {
      Operator<? extends OperatorDesc> child = childOps.get(i);
      int index = child.getParentOperators().indexOf(mapJoinOp);
View Full Code Here

  }
 
  public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf)
      throws HiveException {
    this();
    SMBJoinDesc desc = (SMBJoinDesc) conf;
    this.conf = desc;
   
    order = desc.getTagOrder();
    numAliases = desc.getExprs().size();
    posBigTable = (byte) desc.getPosBigTable();
    filterMaps = desc.getFilterMap();
    tagLen = desc.getTagLength();
    noOuterJoin = desc.isNoOuterJoin();

    // Must obtain vectorized equivalents for filter and value expressions

    Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
    bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable),
        VectorExpressionDescriptor.Mode.FILTER);

    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    keyExpressions = vContext.getVectorExpressions(keyDesc);
    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);

    Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
    bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
   
    // Vectorized join operators need to create a new vectorization region for child operators.

    List<String> outColNames = desc.getOutputColumnNames();
   
    Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
   
    int outColIndex = 0;
    for(String outCol: outColNames) {
      mapOutCols.put(outCol,  outColIndex++);
    }

    vOutContext = new VectorizationContext(mapOutCols, outColIndex);
    vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
    this.fileKey = vOutContext.getFileKey();
  }
View Full Code Here

    }
    return ret;
  }

  private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
    SMBJoinDesc desc = op.getConf();
    // Validation is the same as for map join, since the 'small' tables are not vectorized
    return validateMapJoinDesc(desc);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.SMBJoinDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.