Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator


   * store needed columns in tableScanDesc.
   */
  public static class ColumnPrunerTableScanProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator scanOp = (TableScanOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx
          .genColLists((Operator<? extends Serializable>) nd);
      cppCtx.getPrunedColLists().put((Operator<? extends Serializable>) nd,
          cols);
      ArrayList<Integer> needed_columns = new ArrayList<Integer>();
      RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
      TableScanDesc desc = scanOp.getConf();
      List<VirtualColumn> virtualCols = desc.getVirtualCols();
      List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

      // add virtual columns for ANALYZE TABLE
      if(scanOp.getConf().isGatherStats()) {
        cols.add(VirtualColumn.RAWDATASIZE.getName());
      }

      for (int i = 0; i < cols.size(); i++) {
        String[] tabCol = inputRR.reverseLookup(cols.get(i));
        if(tabCol == null) {
          continue;
        }
        ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
        if (colInfo.getIsVirtualCol()) {
          // part is also a virtual column, but part col should not in this
          // list.
          for (int j = 0; j < virtualCols.size(); j++) {
            VirtualColumn vc = virtualCols.get(j);
            if (vc.getName().equals(colInfo.getInternalName())) {
              newVirtualCols.add(vc);
            }
          }
          //no need to pass virtual columns to reader.
          continue;
        }
        int position = inputRR.getPosition(cols.get(i));
        if (position >=0) {
          needed_columns.add(position);
        }
      }

      desc.setVirtualCols(newVirtualCols);
      scanOp.setNeededColumnIDs(needed_columns);
      return null;
    }
View Full Code Here


      // symbol. So we just pop out the two elements from the top and if the
      // second one of them is not a table scan then the operator on the top of
      // the stack is the Table scan operator.
      Node tmp = stack.pop();
      Node tmp2 = stack.pop();
      TableScanOperator top = null;
      if (tmp2 instanceof TableScanOperator) {
        top = (TableScanOperator) tmp2;
      } else {
        top = (TableScanOperator) stack.peek();
        fop2 = (FilterOperator) tmp2;
      }
      stack.push(tmp2);
      stack.push(tmp);

      // If fop2 exists (i.e this is not the top level filter and fop2 is not
      // a sampling filter then we ignore the current filter
      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
        return null;
      }

      // ignore the predicate in case it is not a sampling predicate
      if (fop.getConf().getIsSamplingPred()) {
        return null;
      }

      // Otherwise this is not a sampling predicate and we need to
      ExprNodeDesc predicate = fop.getConf().getPredicate();
      String alias = top.getConf().getAlias();

      // Generate the partition pruning predicate
      boolean hasNonPartCols = false;
      ExprNodeDesc ppr_pred = ExprProcFactory.genPruner(alias, predicate,
          hasNonPartCols);
View Full Code Here

      // symbol. So we just pop out the two elements from the top and if the
      // second one of them is not a table scan then the operator on the top of
      // the stack is the Table scan operator.
      Node tmp = stack.pop();
      Node tmp2 = stack.pop();
      TableScanOperator top = null;
      Operator<? extends Serializable> pop = null;
      if (tmp2 instanceof TableScanOperator) {
        top = (TableScanOperator) tmp2;
        pop = top;
      } else {
        top = (TableScanOperator) stack.peek();
        fop2 = (FilterOperator) tmp2;
        pop = fop2;
      }
      stack.push(tmp2);
      stack.push(tmp);

      // If fop2 exists (i.e this is not the top level filter and fop2 is not
      // a sampling filter then we ignore the current filter
      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
        return null;
      }

      // ignore the predicate in case it is not a sampling predicate
      if (fop.getConf().getIsSamplingPred()) {
        return null;
      }

      if (fop.getParentOperators().size() > 1) {
        // It's not likely if there is no bug. But in case it happens, we must
        // have found a wrong filter operator. We skip the optimization then.
        return null;
      }


      PrunedPartitionList prunedPartList = owc.getParseContext().getOpToPartList().get(top);
      if (prunedPartList == null) {
        // We never pruned the partition. Try to prune it.
        ExprNodeDesc ppr_pred = owc.getParseContext().getOpToPartPruner().get(top);
        if (ppr_pred == null) {
          // no partition predicate found, skip.
          return null;
        }
        try {
          prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top),
              ppr_pred, owc.getParseContext().getConf(),
              (String) owc.getParseContext().getTopOps().keySet()
              .toArray()[0], owc.getParseContext().getPrunedPartitions());
          if (prunedPartList != null) {
            owc.getParseContext().getOpToPartList().put(top, prunedPartList);
          }
        } catch (HiveException e) {
          // Has to use full name to make sure it does not conflict with
          // org.apache.commons.lang.StringUtils
          throw new SemanticException(e.getMessage(), e);
        }
      }

      // Otherwise this is not a sampling predicate. We need to process it.
      ExprNodeDesc predicate = fop.getConf().getPredicate();
      String alias = top.getConf().getAlias();

      ArrayList<Partition> partitions = new ArrayList<Partition>();
      if (prunedPartList == null) {
        return null;
      }
View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();

    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    Task<? extends Serializable> currTask = TaskFactory.get(currWork, parseCtx.getConf());
    Operator<? extends Serializable> currTopOp = op;
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(currTopOp);

    for (String alias : parseCtx.getTopOps().keySet()) {
      Operator<? extends Serializable> currOp = parseCtx.getTopOps().get(alias);
      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));

        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {

          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator

          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          ctx.getRootTasks().add(currTask);
          currWork.setGatheringStats(true);
          // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
View Full Code Here

        for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends Serializable> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());
View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      LOG.info("Processing for " + nd.getName() + "("
          + ((Operator) nd).getIdentifier() + ")");
      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
      TableScanOperator tsOp = (TableScanOperator) nd;
      mergeWithChildrenPred(tsOp, owi, null, null, false);
      ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp);
      return createFilter(tsOp, pushDownPreds, owi);
    }
View Full Code Here

      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          TableScanOperator tableScan = (TableScanOperator) op;
          pushFilters(newjob, tableScan);
        }
      }

      FileInputFormat.setInputPaths(newjob, dir);
View Full Code Here

    for (String alias : aliases) {
      Operator<? extends Serializable> op = this.mrwork.getAliasToWork().get(
          alias);
      if (op != null && op instanceof TableScanOperator) {
        TableScanOperator tableScan = (TableScanOperator) op;

        // push down projections
        ArrayList<Integer> list = tableScan.getNeededColumnIDs();
        if (list != null) {
          ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
        } else {
          ColumnProjectionUtils.setFullyReadColumns(jobConf);
        }
View Full Code Here

   *
   */
  private static class ReplaceTableScanOpProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator scanOperator = (TableScanOperator)nd;
      rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx;
      String baseTableName = rewriteQueryCtx.getBaseTableName();
      String alias = null;
      if(baseTableName.contains(":")){
        alias = (baseTableName.split(":"))[0];
      }

      //Need to remove the original TableScanOperators from these data structures
      // and add new ones
      Map<TableScanOperator, Table>  topToTable =
        rewriteQueryCtx.getParseContext().getTopToTable();
      Map<String, Operator<? extends Serializable>>  topOps =
        rewriteQueryCtx.getParseContext().getTopOps();
      Map<Operator<? extends Serializable>, OpParseContext>  opParseContext =
        rewriteQueryCtx.getParseContext().getOpParseCtx();

      //need this to set rowResolver for new scanOperator
      OpParseContext operatorContext = opParseContext.get(scanOperator);

      //remove original TableScanOperator
      topToTable.remove(scanOperator);
      topOps.remove(baseTableName);
      opParseContext.remove(scanOperator);

      //construct a new descriptor for the index table scan
      TableScanDesc indexTableScanDesc = new TableScanDesc();
      indexTableScanDesc.setGatherStats(false);

      String indexTableName = rewriteQueryCtx.getIndexName();
      Table indexTableHandle = null;
      try {
        indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
      } catch (HiveException e) {
        LOG.error("Error while getting the table handle for index table.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }

      String k = indexTableName + Path.SEPARATOR;
      indexTableScanDesc.setStatsAggPrefix(k);
      scanOperator.setConf(indexTableScanDesc);

      //Construct the new RowResolver for the new TableScanOperator
      RowResolver rr = new RowResolver();
      try {
        StructObjectInspector rowObjectInspector =
          (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
        List<? extends StructField> fields = rowObjectInspector
        .getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
          rr.put(indexTableName, fields.get(i).getFieldName(), new ColumnInfo(fields
              .get(i).getFieldName(), TypeInfoUtils
              .getTypeInfoFromObjectInspector(fields.get(i)
                  .getFieldObjectInspector()), indexTableName, false));
        }
      } catch (SerDeException e) {
        LOG.error("Error while creating the RowResolver for new TableScanOperator.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }

      //Set row resolver for new table
      operatorContext.setRowResolver(rr);
      String tabNameWithAlias = null;
      if(alias != null){
        tabNameWithAlias = alias + ":" + indexTableName;
       }else{
         tabNameWithAlias = indexTableName;
       }

      //Scan operator now points to other table
      topToTable.put(scanOperator, indexTableHandle);
      scanOperator.getConf().setAlias(tabNameWithAlias);
      scanOperator.setAlias(indexTableName);
      topOps.put(tabNameWithAlias, scanOperator);
      opParseContext.put(scanOperator, operatorContext);
      rewriteQueryCtx.getParseContext().setTopToTable(
          (HashMap<TableScanOperator, Table>) topToTable);
      rewriteQueryCtx.getParseContext().setTopOps(
View Full Code Here

          // from (from clustergroupbyselect key, value where ds='210') group by key, 3;",
          // even though the group by op is in a subquery, it can be changed to
          // bucket groupby.
          return;
        }
        TableScanOperator ts = (TableScanOperator) topOp;
        Table destTable = pGraphContext.getTopToTable().get(ts);
        if (destTable == null) {
          return;
        }
        if (!destTable.isPartitioned()) {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.TableScanOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.