Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.JoinDesc


    noOuterJoin = joinOp.noOuterJoin;
  }

  public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(
        numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, SerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);

    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
      Byte alias = conf.getTagOrder()[i];
      List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
      StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
          .toString());
      List<? extends StructField> keyFields = ((StructObjectInspector) sf
          .getFieldObjectInspector()).getAllStructFieldRefs();
      int keyFieldSize = keyFields.size();
      for (int k = 0; k < keyFieldSize; k++) {
        skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
      }
      TableDesc joinKeyDesc = desc.getKeyTableDesc();
      List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc
          .getProperties());
      StructObjectInspector structTblKeyInpector = ObjectInspectorFactory
          .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
View Full Code Here


      throw new SemanticException(e.getMessage());
    }

    BigTableSelectorForAutoSMJ bigTableMatcher =
        ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
      // This is a full outer join. This can never be a map-join
      // of any type. So return false.
      return false;
View Full Code Here

    MapJoinDesc mapJoinDesc = null;
    if (adjustParentsChildren) {
        mapJoinDesc = MapJoinProcessor.getMapJoinDesc(context.conf, parseContext.getOpParseCtx(),
            joinOp, parseContext.getJoinContext().get(joinOp), mapJoinConversionPos, true);
    } else {
      JoinDesc joinDesc = joinOp.getConf();
      // retain the original join desc in the map join.
      mapJoinDesc =
          new MapJoinDesc(null, null, joinDesc.getExprs(), null, null,
              joinDesc.getOutputColumnNames(), mapJoinConversionPos, joinDesc.getConds(),
              joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null);
      mapJoinDesc.setNullSafes(joinDesc.getNullSafes());
      mapJoinDesc.setFilterMap(joinDesc.getFilterMap());
      mapJoinDesc.resetOrder();
    }

    @SuppressWarnings("unchecked")
    CommonMergeJoinOperator mergeJoinOp =
View Full Code Here

      throw new SemanticException(e.getMessage());
    }

    BigTableSelectorForAutoSMJ bigTableMatcher =
      (BigTableSelectorForAutoSMJ) ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
      // This is a full outer join. This can never be a map-join
      // of any type. So return false.
      return false;
View Full Code Here

      assert(!stack.isEmpty());

      // LineageCtx
      LineageCtx lCtx = (LineageCtx) procCtx;
      JoinOperator op = (JoinOperator)nd;
      JoinDesc jd = op.getConf();

      // The input operator to the join is always a reduce sink operator
      ReduceSinkOperator inpOp = (ReduceSinkOperator)getParent(stack);
      ReduceSinkDesc rd = inpOp.getConf();
      int tag = rd.getTag();

      // Iterate over the outputs of the join operator and merge the
      // dependencies of the columns that corresponding to the tag.
      int cnt = 0;
      List<ExprNodeDesc> exprs = jd.getExprs().get((byte)tag);
      for(ColumnInfo ci : op.getSchema().getSignature()) {
        if (jd.getReversedExprs().get(ci.getInternalName()) != tag) {
          continue;
        }

        // Otherwise look up the expression corresponding to this ci
        ExprNodeDesc expr = exprs.get(cnt++);
View Full Code Here

    noOuterJoin = joinOp.noOuterJoin;
  }

  public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(
        numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, SerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);

    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
      Byte alias = conf.getTagOrder()[i];
      List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
      StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
          .toString());
      List<? extends StructField> keyFields = ((StructObjectInspector) sf
          .getFieldObjectInspector()).getAllStructFieldRefs();
      int keyFieldSize = keyFields.size();
      for (int k = 0; k < keyFieldSize; k++) {
        skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
      }
      TableDesc joinKeyDesc = desc.getKeyTableDesc();
      List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc
          .getProperties());
      StructObjectInspector structTblKeyInpector = ObjectInspectorFactory
          .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
View Full Code Here

  public static class ConstantPropagateJoinProc implements NodeProcessor {
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      JoinOperator op = (JoinOperator) nd;
      JoinDesc conf = op.getConf();
      ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx;
      Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op);
      cppCtx.getOpToConstantExprs().put(op, constants);
      if (constants.isEmpty()) {
        return null;
      }

      // Note: the following code (removing folded constants in exprs) is deeply coupled with
      //    ColumnPruner optimizer.
      // Assuming ColumnPrunner will remove constant columns so we don't deal with output columns.
      //    Except one case that the join operator is followed by a redistribution (RS operator).
      if (op.getChildOperators().size() == 1
          && op.getChildOperators().get(0) instanceof ReduceSinkOperator) {
        LOG.debug("Skip JOIN-RS structure.");
        return null;
      }
      LOG.info("Old exprs " + conf.getExprs());
      Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = conf.getExprs().entrySet().iterator();
      while (itr.hasNext()) {
        Entry<Byte, List<ExprNodeDesc>> e = itr.next();
        int tag = e.getKey();
        List<ExprNodeDesc> exprs = e.getValue();
        if (exprs == null) {
          continue;
        }
        List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDesc expr : exprs) {
          ExprNodeDesc newExpr = foldExpr(expr, constants, cppCtx, op, tag, false);
          if (newExpr instanceof ExprNodeConstantDesc || newExpr instanceof ExprNodeNullDesc) {
            LOG.info("expr " + newExpr + " fold from " + expr + " is removed.");
            continue;
          }
          newExprs.add(newExpr);
        }
        e.setValue(newExprs);
      }
      LOG.info("New exprs " + conf.getExprs());

      for (List<ExprNodeDesc> v : conf.getFilters().values()) {
        for (int i = 0; i < v.size(); i++) {
          ExprNodeDesc expr = foldExpr(v.get(i), constants, cppCtx, op, 0, false);
          v.set(i, expr);
        }
      }
View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
      JoinOperator jop = (JoinOperator)nd;
      JoinDesc joinDesc = jop.getConf();

      Byte[] order = joinDesc.getTagOrder();
      Map<Byte, List<ExprNodeDesc>> expressions = joinDesc.getExprs();
      List<String> outputValNames = joinDesc.getOutputColumnNames();

      BucketCol[] newBucketCols = null;
      SortCol[] newSortCols = null;

      for (int i = 0; i < jop.getParentOperators().size(); i++) {
View Full Code Here

    Task<? extends Serializable> child =
        children != null && children.size() == 1 ? children.get(0) : null;

    Path baseTmpDir = parseCtx.getContext().getMRTmpPath();

    JoinDesc joinDescriptor = joinOp.getConf();
    Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
    int numAliases = joinValues.size();

    Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
    Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
    Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
    Byte[] tags = joinDescriptor.getTagOrder();
    for (int i = 0; i < numAliases; i++) {
      Byte alias = tags[i];
      bigKeysDirMap.put(alias, getBigKeysDir(baseTmpDir, alias));
      Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
      smallKeysDirMap.put(alias, smallKeysMap);
      for (Byte src2 : tags) {
        if (!src2.equals(alias)) {
          smallKeysMap.put(src2, getSmallKeysDir(baseTmpDir, alias, src2));
        }
      }
      skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir,
          alias));
    }

    joinDescriptor.setHandleSkewJoin(true);
    joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
    joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
    joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(),
        HiveConf.ConfVars.HIVESKEWJOINKEY));

    HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap =
      new HashMap<Path, Task<? extends Serializable>>();
    List<Serializable> listWorks = new ArrayList<Serializable>();
    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    MapredWork currPlan = (MapredWork) currTask.getWork();

    TableDesc keyTblDesc = (TableDesc) currPlan.getReduceWork().getKeyDesc().clone();
    List<String> joinKeys = Utilities
        .getColumnNames(keyTblDesc.getProperties());
    List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc
        .getProperties());

    Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
    Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
    Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
    // used for create mapJoinDesc, should be in order
    List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();

    for (Byte tag : tags) {
      newJoinValueTblDesc.add(null);
    }

    for (int i = 0; i < numAliases; i++) {
      Byte alias = tags[i];
      List<ExprNodeDesc> valueCols = joinValues.get(alias);
      String colNames = "";
      String colTypes = "";
      int columnSize = valueCols.size();
      List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
      List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
      ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();

      boolean first = true;
      for (int k = 0; k < columnSize; k++) {
        TypeInfo type = valueCols.get(k).getTypeInfo();
        String newColName = i + "_VALUE_" + k; // any name, it does not matter.
        ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
        columnInfos.add(columnInfo);
        newValueExpr.add(new ExprNodeColumnDesc(columnInfo));
        if (!first) {
          colNames = colNames + ",";
          colTypes = colTypes + ",";
        }
        first = false;
        colNames = colNames + newColName;
        colTypes = colTypes + valueCols.get(k).getTypeString();
      }

      // we are putting join keys at last part of the spilled table
      for (int k = 0; k < joinKeys.size(); k++) {
        if (!first) {
          colNames = colNames + ",";
          colTypes = colTypes + ",";
        }
        first = false;
        colNames = colNames + joinKeys.get(k);
        colTypes = colTypes + joinKeyTypes.get(k);
        ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory
            .getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
        columnInfos.add(columnInfo);
        newKeyExpr.add(new ExprNodeColumnDesc(columnInfo));
      }

      newJoinValues.put(alias, newValueExpr);
      newJoinKeys.put(alias, newKeyExpr);
      tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
      rowSchemaList.put(alias, new RowSchema(columnInfos));

      // construct value table Desc
      String valueColNames = "";
      String valueColTypes = "";
      first = true;
      for (int k = 0; k < columnSize; k++) {
        String newColName = i + "_VALUE_" + k; // any name, it does not matter.
        if (!first) {
          valueColNames = valueColNames + ",";
          valueColTypes = valueColTypes + ",";
        }
        valueColNames = valueColNames + newColName;
        valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
        first = false;
      }
      newJoinValueTblDesc.set(Byte.valueOf((byte) i), Utilities.getTableDesc(
          valueColNames, valueColTypes));
    }

    joinDescriptor.setSkewKeysValuesTables(tableDescList);
    joinDescriptor.setKeyTableDesc(keyTblDesc);

    for (int i = 0; i < numAliases - 1; i++) {
      Byte src = tags[i];
      MapWork newPlan = PlanUtils.getMapRedWork().getMapWork();

      // This code has been only added for testing
      boolean mapperCannotSpanPartns =
        parseCtx.getConf().getBoolVar(
          HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
      newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);

      MapredWork clonePlan = Utilities.clonePlan(currPlan);

      Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
      for (int k = 0; k < tags.length; k++) {
        Operator<? extends OperatorDesc> ts =
            GenMapRedUtils.createTemporaryTableScanOperator(rowSchemaList.get((byte)k));
        ((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k));
        parentOps[k] = ts;
      }
      Operator<? extends OperatorDesc> tblScan_op = parentOps[i];

      ArrayList<String> aliases = new ArrayList<String>();
      String alias = src.toString();
      aliases.add(alias);
      Path bigKeyDirPath = bigKeysDirMap.get(src);
      newPlan.getPathToAliases().put(bigKeyDirPath.toString(), aliases);

      newPlan.getAliasToWork().put(alias, tblScan_op);
      PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);

      newPlan.getPathToPartitionInfo().put(bigKeyDirPath.toString(), part);
      newPlan.getAliasToPartnInfo().put(alias, part);

      Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();
      assert reducer instanceof JoinOperator;
      JoinOperator cloneJoinOp = (JoinOperator) reducer;

      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
      MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
          newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
View Full Code Here

    // get parseCtx for this Join Operator
    ParseContext parseCtx = physicalContext.getParseContext();
    QBJoinTree joinTree = parseCtx.getJoinContext().get(joinOp);

    // start to generate multiple map join tasks
    JoinDesc joinDesc = joinOp.getConf();

    if (aliasToSize == null) {
      aliasToSize = new HashMap<String, Long>();
    }

    try {
      long aliasTotalKnownInputSize =
          getTotalKnownInputSize(context, currWork, pathToAliases, aliasToSize);

      Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc
          .getConds());

      // no table could be the big table; there is no need to convert
      if (bigTableCandidates.isEmpty()) {
        return null;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.JoinDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.