Source Code of org.apache.tez.mapreduce.processor.map.MapProcessor$NewRecordReader

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tez.mapreduce.processor.map;


import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.MapRunnable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.mapreduce.hadoop.mapreduce.MapContextImpl;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.input.MRInputLegacy;
import org.apache.tez.mapreduce.output.MROutputLegacy;
import org.apache.tez.mapreduce.processor.MRTask;
import org.apache.tez.mapreduce.processor.MRTaskReporter;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput;


@SuppressWarnings({ "unchecked", "rawtypes" })
@Private
public class MapProcessor extends MRTask{


  private static final Log LOG = LogFactory.getLog(MapProcessor.class);


  public MapProcessor(ProcessorContext processorContext) {
    super(processorContext, true);
  }


  @Override
  public void handleEvents(List<Event> processorEvents) {
    // TODO Auto-generated method stub


  }


  public void close() throws IOException {
    // TODO Auto-generated method stub


  }


  @Override
  public void run(Map<String, LogicalInput> inputs,
      Map<String, LogicalOutput> outputs) throws Exception {


    LOG.info("Running map: " + processorContext.getUniqueIdentifier());
    for (LogicalInput input : inputs.values()) {
      input.start();
    }
    for (LogicalOutput output : outputs.values()) {
      output.start();
    }


    if (inputs.size() != 1
        || outputs.size() != 1) {
      throw new IOException("Cannot handle multiple inputs or outputs"
          + ", inputCount=" + inputs.size()
          + ", outputCount=" + outputs.size());
    }
    LogicalInput in = inputs.values().iterator().next();
    LogicalOutput out = outputs.values().iterator().next();


    initTask(out);


    // Sanity check
    if (!(in instanceof MRInputLegacy)) {
      throw new IOException(new TezException(
          "Only MRInputLegacy supported. Input: " + in.getClass()));
    }
    MRInputLegacy input = (MRInputLegacy)in;
    input.init();
    Configuration incrementalConf = input.getConfigUpdates();
    if (incrementalConf != null) {
      for (Entry<String, String> entry : incrementalConf) {
        jobConf.set(entry.getKey(), entry.getValue());
      }
    }


    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
      kvWriter = ((MROutputLegacy)out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)){
      kvWriter = ((OrderedPartitionedKVOutput)out).getWriter();
    } else {
      throw new IOException("Illegal output to map, outputClass="
          + out.getClass());
    }


    if (useNewApi) {
      runNewMapper(jobConf, mrReporter, input, kvWriter);
    } else {
      runOldMapper(jobConf, mrReporter, input, kvWriter);
    }


    done();
  }
  


  /**
   * Update the job with details about the file split
   * @param job the job configuration to update
   * @param inputSplit the file split
   */
  private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
      job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
      job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
    }
    LOG.info("Processing mapred split: " + inputSplit);
  }
  
  private void updateJobWithSplit(
      final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) {
    if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) {
      org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = 
          (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit;
      job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
      job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
      job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
    }
    LOG.info("Processing mapreduce split: " + inputSplit);
  }


  void runOldMapper(
      final JobConf job,
      final MRTaskReporter reporter,
      final MRInputLegacy input,
      final KeyValueWriter output
      ) throws IOException, InterruptedException {


    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    //input.initialize(job, master);
    
    InputSplit inputSplit = input.getOldInputSplit();
    
    updateJobWithSplit(job, inputSplit);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);


    runner.run(in, collector, (Reporter)reporter);
    
    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);
    // start the sort phase only if there are reducers
    this.statusUpdate();
  }


  private void runNewMapper(final JobConf job,
      MRTaskReporter reporter,
      final MRInputLegacy in,
      KeyValueWriter out
      ) throws IOException, InterruptedException {


    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    //in.initialize(job, master);


    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
        getTaskAttemptContext();


    // make a mapper
    org.apache.hadoop.mapreduce.Mapper mapper;
    try {
      mapper = (org.apache.hadoop.mapreduce.Mapper)
          ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }


    org.apache.hadoop.mapreduce.RecordReader input =
        new NewRecordReader(in);


    org.apache.hadoop.mapreduce.RecordWriter output =
        new NewOutputCollector(out);


    org.apache.hadoop.mapreduce.InputSplit split = in.getNewInputSplit();
    
    updateJobWithSplit(job, split);


    org.apache.hadoop.mapreduce.MapContext
    mapContext =
    new MapContextImpl(
        job, taskAttemptId,
        input, output,
        committer,
        processorContext, split, reporter);


    org.apache.hadoop.mapreduce.Mapper.Context mapperContext =
        new WrappedMapper().getMapContext(mapContext);


    input.initialize(split, mapperContext);
    mapper.run(mapperContext);
    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);
    
    this.statusUpdate();
    input.close();
    output.close(mapperContext);
  }


  private static class NewRecordReader extends
      org.apache.hadoop.mapreduce.RecordReader {
    private final MRInput in;
    private KeyValueReader reader;


    private NewRecordReader(MRInput in) throws IOException {
      this.in = in;
      this.reader = in.getReader();
    }


    @Override
    public void initialize(org.apache.hadoop.mapreduce.InputSplit split,
        TaskAttemptContext context) throws IOException,
        InterruptedException {
      //in.initializeNewRecordReader(split, context);
    }


    @Override
    public boolean nextKeyValue() throws IOException,
        InterruptedException {
      return reader.next();
    }


    @Override
    public Object getCurrentKey() throws IOException,
        InterruptedException {
      return reader.getCurrentKey();
    }


    @Override
    public Object getCurrentValue() throws IOException,
        InterruptedException {
      return reader.getCurrentValue();
    }


    @Override
    public float getProgress() throws IOException, InterruptedException {
      return in.getProgress();
    }


    @Override
    public void close() throws IOException {
    }
  }


  private static class OldRecordReader implements RecordReader {
    private final MRInputLegacy mrInput;


    private OldRecordReader(MRInputLegacy mrInput) {
      this.mrInput = mrInput;
    }


    @Override
    public boolean next(Object key, Object value) throws IOException {
      // TODO broken
//      mrInput.setKey(key);
//      mrInput.setValue(value);
//      try {
//        return mrInput.hasNext();
//      } catch (InterruptedException ie) {
//        throw new IOException(ie);
//      }
      return mrInput.getOldRecordReader().next(key, value);
    }


    @Override
    public Object createKey() {
      return mrInput.getOldRecordReader().createKey();
    }


    @Override
    public Object createValue() {
      return mrInput.getOldRecordReader().createValue();
    }


    @Override
    public long getPos() throws IOException {
      return mrInput.getOldRecordReader().getPos();
    }


    @Override
    public void close() throws IOException {
    }


    @Override
    public float getProgress() throws IOException {
      try {
        return mrInput.getProgress();
      } catch (InterruptedException ie) {
        throw new IOException(ie);
      }
    }
  }


  private static class OldOutputCollector
  implements OutputCollector {
    private final KeyValueWriter output;


    OldOutputCollector(KeyValueWriter output) {
      this.output = output;
    }


    public void collect(Object key, Object value) throws IOException {
        output.write(key, value);
    }
  }


  private class NewOutputCollector
    extends org.apache.hadoop.mapreduce.RecordWriter {
    private final KeyValueWriter out;


    NewOutputCollector(KeyValueWriter out) throws IOException {
      this.out = out;
    }


    @Override
    public void write(Object key, Object value) throws IOException, InterruptedException {
      out.write(key, value);
    }


    @Override
    public void close(TaskAttemptContext context
                      ) throws IOException, InterruptedException {
    }
  }


  @Override
  public void localizeConfiguration(JobConf jobConf)
      throws IOException, InterruptedException {
    super.localizeConfiguration(jobConf);
    jobConf.setBoolean(JobContext.TASK_ISMAP, true);
  }
}
Source Code of org.apache.tez.mapreduce.processor.map.MapProcessor$NewRecordReader

Related Classes of org.apache.tez.mapreduce.processor.map.MapProcessor$NewRecordReader