Examples of org.apache.hadoop.hive.ql.plan.MapWork

org.apache.hadoop.hive.ql.plan.MapWork
MapWork represents all the information used to run a map task on the cluster. It is first used when the query planner breaks the logical plan into tasks and used throughout physical optimization to track map-side operator plans, input paths, aliases, etc. ExecDriver will serialize the contents of this class and make sure it is distributed on the cluster. The ExecMapper will ultimately deserialize this class on the data nodes and setup it's operator pipeline accordingly. This class is also used in the explain command any property with the appropriate annotation will be displayed in the explain output.

          }
        });


    work = new TezWork("");


    mws = new MapWork[] { new MapWork(), new MapWork()};
    rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };


    work.addAll(mws);
    work.addAll(rws);

View Full Code Here

    Driver driver = new Driver(queryConf);
    driver.compile(qlCommand.toString(), false);


    if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) {
      // For now, only works if the predicate is a single condition
      MapWork work = null;
      String originalInputFormat = null;
      for (Task task : driver.getPlan().getRootTasks()) {
        // The index query should have one and only one map reduce task in the root tasks
        // Otherwise something is wrong, log the problem and continue using the default format
        if (task.getWork() instanceof MapredWork) {
          if (work != null) {
            LOG.error("Tried to use a binary search on a compact index but there were an " +
                      "unexpected number (>1) of root level map reduce tasks in the " +
                      "reentrant query plan.");
            work.setInputformat(null);
            work.setInputFormatSorted(false);
            break;
          }
          if (task.getWork() != null) {
            work = ((MapredWork)task.getWork()).getMapWork();
          }
          String inputFormat = work.getInputformat();
          originalInputFormat = inputFormat;
          if (inputFormat == null) {
            inputFormat = HiveConf.getVar(pctx.getConf(), HiveConf.ConfVars.HIVEINPUTFORMAT);
          }


          // We can only perform a binary search with HiveInputFormat and CombineHiveInputFormat
          // and BucketizedHiveInputFormat
          try {
            if (!HiveInputFormat.class.isAssignableFrom(Class.forName(inputFormat))) {
              work = null;
              break;
            }
          } catch (ClassNotFoundException e) {
            LOG.error("Map reduce work's input format class: " + inputFormat + " was not found. " +
                       "Cannot use the fact the compact index is sorted.");
            work = null;
            break;
          }


          work.setInputFormatSorted(true);
        }
      }


      if (work != null) {
        // Find the filter operator and expr node which act on the index column and mark them
        if (!findIndexColumnFilter(work.getAliasToWork().values())) {
          LOG.error("Could not locate the index column's filter operator and expr node. Cannot " +
                    "use the fact the compact index is sorted.");
          work.setInputformat(originalInputFormat);
          work.setInputFormatSorted(false);
        }
      }
    }

View Full Code Here

   * @throws AvroSerdeException
   */
  private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException, IOException {
    // Inside of a MR job, we can pull out the actual properties
    if(AvroSerdeUtils.insideMRJob(job)) {
      MapWork mapWork = Utilities.getMapWork(job);


      // Iterate over the Path -> Partition descriptions to find the partition
      // that matches our input split.
      for (Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){
        String partitionPath = pathsAndParts.getKey();
        if(pathIsInPartition(split.getPath(), partitionPath)) {
          if(LOG.isInfoEnabled()) {
              LOG.info("Matching partition " + partitionPath +
                      " with input split " + split);

View Full Code Here

      }
    }
  }


  public static Map<String, Map<Integer, String>> getMapWorkAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
    MapWork mapWork = getMapWork(hiveConf);
    return mapWork.getAllScratchColumnVectorTypeMaps();
  }

View Full Code Here

  }


  public MapWork createMapWork(GenTezProcContext context, Operator<?> root,
      TezWork tezWork, PrunedPartitionList partitions) throws SemanticException {
    assert root.getParentOperators().isEmpty();
    MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
    LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);


    // map work starts with table scan operators
    assert root instanceof TableScanOperator;
    TableScanOperator ts = (TableScanOperator) root;


    String alias = ts.getConf().getAlias();


    setupMapWork(mapWork, context, partitions, root, alias);


    if (context.parseContext != null
        && context.parseContext.getTopToTable() != null
        && context.parseContext.getTopToTable().containsKey(ts)
        && context.parseContext.getTopToTable().get(ts).isDummyTable()) {
      mapWork.setDummyTableScan(true);
    }


    // add new item to the tez work
    tezWork.add(mapWork);

View Full Code Here

    }


    DynamicPruningEventDesc eventDesc = (DynamicPruningEventDesc)event.getConf();
    TableScanOperator ts = eventDesc.getTableScan();


    MapWork work = (MapWork) procCtx.rootToWorkMap.get(ts);
    if (work == null) {
      throw new AssertionError("No work found for tablescan " + ts);
    }


    BaseWork enclosingWork = getEnclosingWork(event, procCtx);
    if (enclosingWork == null) {
      throw new AssertionError("Cannot find work for operator" + event);
    }
    String sourceName = enclosingWork.getName();


    // store the vertex name in the operator pipeline
    eventDesc.setVertexName(work.getName());
    eventDesc.setInputName(work.getAliases().get(0));


    // store table descriptor in map-work
    if (!work.getEventSourceTableDescMap().containsKey(sourceName)) {
      work.getEventSourceTableDescMap().put(sourceName, new LinkedList<TableDesc>());
    }
    List<TableDesc> tables = work.getEventSourceTableDescMap().get(sourceName);
    tables.add(event.getConf().getTable());


    // store column name in map-work
    if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) {
      work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>());
    }
    List<String> columns = work.getEventSourceColumnNameMap().get(sourceName);
    columns.add(eventDesc.getTargetColumnName());


    // store partition key expr in map-work
    if (!work.getEventSourcePartKeyExprMap().containsKey(sourceName)) {
      work.getEventSourcePartKeyExprMap().put(sourceName, new LinkedList<ExprNodeDesc>());
    }
    List<ExprNodeDesc> keys = work.getEventSourcePartKeyExprMap().get(sourceName);
    keys.add(eventDesc.getPartKey());


  }

View Full Code Here


    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    ShimLoader.getHadoopShims().getMergedCredentials(jobConf);


    MapWork work = Utilities.getMapWork(jobConf);


    // perform dynamic partition pruning
    pruner.prune(work, jobConf, context);


    InputSplitInfoMem inputSplitInfo = null;

View Full Code Here


  public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf,
      Configuration conf, InputSplit[] splits, float waves, int availableSlots, String inputName,
      boolean groupAcrossFiles) throws Exception {


    MapWork work = populateMapWork(jobConf, inputName);
    Multimap<Integer, InputSplit> bucketSplitMultiMap =
        ArrayListMultimap.<Integer, InputSplit> create();


    int i = 0;
    InputSplit prevSplit = null;

View Full Code Here


    return groupedSplits;
  }


  private MapWork populateMapWork(JobConf jobConf, String inputName) {
    MapWork work = null;
    if (inputName != null) {
      work = (MapWork) Utilities.getMergeWork(jobConf, inputName);
      // work can still be null if there is no merge work for this input
    }
    if (work == null) {

View Full Code Here

        Table source = queryBlock.getMetaData().getTableForAlias(alias);
        List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
        partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false);
      }


      MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
      w.setGatheringStats(true);


      return true;
      }
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapWork

com.linkedin.haivvreo.AvroGenericRecordReader

com.linkedin.haivvreo.AvroSerDe

org.apache.hadoop.hive.ql.DriverContext

org.apache.hadoop.hive.ql.exec.MoveTask

org.apache.hadoop.hive.ql.exec.mr.ExecDriver

org.apache.hadoop.hive.ql.exec.mr.ExecMapper

org.apache.hadoop.hive.ql.exec.tez.DagUtils

org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator

org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor

org.apache.hadoop.hive.ql.exec.tez.MergeFileRecordProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.