Examples of TableScanOperator

org.apache.hadoop.hive.ql.exec.TableScanOperator
Table Scan Operator If the data is coming from the map-reduce framework, just forward it. This will be needed as part of local work when data is not being read as part of map-reduce framework

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

      List<String> keys = toColumns(keysMap.get((byte) index));
      if (keys == null || keys.isEmpty()) {
        return false;
      }
      int oldKeySize = keys.size();
      TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, keys);
      if (tso == null) {
        // We cannot get to root TableScan operator, likely because there is a join or group-by
        // between topOp and root TableScan operator. We don't handle that case, and simply return
        return false;
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

          walkerCtx.getMetadataOnlyTableScans().size()));
      Iterator<TableScanOperator> iterator
        = walkerCtx.getMetadataOnlyTableScans().iterator();


      while (iterator.hasNext()) {
        TableScanOperator tso = iterator.next();
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        String alias = getAliasForTableScanOperator(work, tso);
        LOG.info("Metadata only table scan for " + alias);
        processAlias(work, alias);
      }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator


    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());


    if (partsList == null) {
      try {
        TableScanOperator tsOp = (TableScanOperator) topOp;
        partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          TableScanOperator tableScan = (TableScanOperator) op;
          pushFilters(newjob, tableScan);
        }
      }


      FileInputFormat.setInputPaths(newjob, dir);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator


    for (String alias : aliases) {
      Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(
        alias);
      if (op != null && op instanceof TableScanOperator) {
        TableScanOperator tableScan = (TableScanOperator) op;


        // push down projections
        ArrayList<Integer> list = tableScan.getNeededColumnIDs();
        if (list != null) {
          ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
        } else {
          ColumnProjectionUtils.setFullyReadColumns(jobConf);
        }
        ColumnProjectionUtils.appendReadColumnNames(jobConf,
            tableScan.getNeededColumns());


        pushFilters(jobConf, tableScan);
      }
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

        for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        ts.setOutputCollector(sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

      if ((sampleDescr == null) || !sampleDescr.getInputPruning()) {
        return null;
      }


      assert stack.size() == 3;
      TableScanOperator tsOp = (TableScanOperator) stack.get(0);
      ((SamplePrunerCtx) procCtx).getOpToSamplePruner().put(tsOp, sampleDescr);
      return null;
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

        for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends Serializable> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

   * store needed columns in tableScanDesc.
   */
  public static class ColumnPrunerTableScanProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator scanOp = (TableScanOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx
          .genColLists((Operator<? extends Serializable>) nd);
      cppCtx.getPrunedColLists().put((Operator<? extends Serializable>) nd,
          cols);
      ArrayList<Integer> needed_columns = new ArrayList<Integer>();
      RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
      TableScanDesc desc = scanOp.getConf();
      List<VirtualColumn> virtualCols = desc.getVirtualCols();
      List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();


      // add virtual columns for ANALYZE TABLE
      if(scanOp.getConf().isGatherStats()) {
        cols.add(VirtualColumn.RAWDATASIZE.getName());
      }


      for (int i = 0; i < cols.size(); i++) {
        String[] tabCol = inputRR.reverseLookup(cols.get(i));
        if(tabCol == null) {
          continue;
        }
        ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
        if (colInfo.getIsVirtualCol()) {
          // part is also a virtual column, but part col should not in this
          // list.
          for (int j = 0; j < virtualCols.size(); j++) {
            VirtualColumn vc = virtualCols.get(j);
            if (vc.getName().equals(colInfo.getInternalName())) {
              newVirtualCols.add(vc);
            }
          }
          //no need to pass virtual columns to reader.
          continue;
        }
        int position = inputRR.getPosition(cols.get(i));
        if (position >=0) {
          needed_columns.add(position);
        }
      }


      desc.setVirtualCols(newVirtualCols);
      scanOp.setNeededColumnIDs(needed_columns);
      return null;
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.