Examples of TableScanOperator


Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          TableScanOperator tableScan = (TableScanOperator) op;
          pushFilters(newjob, tableScan);
        }
      }

      FileInputFormat.setInputPaths(newjob, dir);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

    for (String alias : aliases) {
      Operator<? extends Serializable> op = this.mrwork.getAliasToWork().get(
          alias);
      if (op != null && op instanceof TableScanOperator) {
        TableScanOperator tableScan = (TableScanOperator) op;

        // push down projections
        ArrayList<Integer> list = tableScan.getNeededColumnIDs();
        if (list != null) {
          ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
        } else {
          ColumnProjectionUtils.setFullyReadColumns(jobConf);
        }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

          inputs.add(new ReadEntity(tab));
        }
      } else {

        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];

          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {

            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }

            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();

              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();

                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }

              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }

      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureTableJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);

        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }

    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVELIMITOPTENABLE)
        && ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {

      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);

      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0)  {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);

        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

   * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
                        Object... nodeOutputs) throws SemanticException {

    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();

    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      LOG.info("Processing for " + nd.getName() + "("
          + ((Operator) nd).getIdentifier() + ")");
      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
      TableScanOperator tsOp = (TableScanOperator) nd;
      mergeWithChildrenPred(tsOp, owi, null, null, false);
      ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp);
      return createFilter(tsOp, pushDownPreds, owi);
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

   * The Node Processor for Column Pruning on Table Scan Operators. It will store
   * needed columns in tableScanDesc.
   */
  public static class ColumnPrunerTableScanProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
      TableScanOperator scanOp = (TableScanOperator)nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
      List<String> cols = cppCtx.genColLists((Operator<? extends Serializable>)nd);
      cppCtx.getPrunedColLists().put((Operator<? extends Serializable>)nd, cols);
      ArrayList<Integer> needed_columns = new ArrayList<Integer>();
      RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRR();
      for (int i = 0; i < cols.size(); i++) {
        int position = inputRR.getPosition(cols.get(i));
        needed_columns.add(position);
      }
      scanOp.setNeededColumnIDs(needed_columns);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

      if ((sampleDescr == null) || !sampleDescr.getInputPruning())
        return null;

      assert stack.size() == 3;
      TableScanOperator tsOp = (TableScanOperator)stack.get(0);
      ((SamplePrunerCtx)procCtx).getOpToSamplePruner().put(tsOp, sampleDescr);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      LOG.info("Processing for " +  nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")");
      OpWalkerInfo owi = (OpWalkerInfo)procCtx;
      TableScanOperator tsOp = (TableScanOperator)nd;
      mergeWithChildrenPred(tsOp, owi, null, null, false);
      ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp);
      return createFilter(tsOp, pushDownPreds, owi);
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

          // "select key, count(1) from (from clustergroupbyselect key, value where ds='210') group by key, 3;",
          // even though the group by op is in a subquery, it can be changed to
          // bucket groupby.
          return;
        }
        TableScanOperator ts = (TableScanOperator) topOp;
        Table destTable = this.pGraphContext.getTopToTable().get(ts);
        if (destTable == null)
          return;
        if (!destTable.isPartitioned()) {
          List<String> bucketCols = destTable.getBucketCols();
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

        }
      }
      else {

        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator)topOps.values().toArray()[0];

          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts), opToPartPruner.get(ts))) {

            PrunedPartitionList partsList = null;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.