Examples of org.apache.hadoop.mapred.RecordReader

org.apache.hadoop.mapred.RecordReader
RecordReader reads <key, value> pairs from an {@link InputSplit}.
RecordReader, typically, converts the byte-oriented view of the input, provided by the InputSplit, and presents a record-oriented view for the {@link Mapper} & {@link Reducer} tasks for processing. It thus assumes the responsibility of processing record boundaries and presenting the tasks with keys and values.
@see InputSplit @see InputFormat

        //LOG.info("splitting: got =        " + splits.length);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();


    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(targetSplit, job,
          reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil

View Full Code Here

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);


    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil

View Full Code Here

    
    InputSplit inputSplit = input.getOldInputSplit();
    
    updateJobWithSplit(job, inputSplit);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);

View Full Code Here

        //LOG.info("splitting: got =        " + splits.length);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(fs, splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

        InputSplit[] splits = format.getSplits(job, numSplits);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

    
    InputSplit inputSplit = input.getOldInputSplit();
    
    updateJobWithSplit(job, inputSplit);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);

View Full Code Here

    AutoInputFormat autoInputFormat = new AutoInputFormat();
    for (FileStatus fileStatus : files) {
      FileSplit split = new FileSplit(fileStatus.getPath(), 0,
        fileStatus.getLen() * fileStatus.getBlockSize(),
        (String[]) null);
      RecordReader recReader = null;
      try {
        recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
        Object key = recReader.createKey();
        Object value = recReader.createValue();
        while (recReader.next(key, value)) {
          if (key instanceof Writable) {
            TypedBytesWritableOutput.get(dout).write((Writable) key);
          } else {
            TypedBytesOutput.get(dout).write(key);
          }
          if (value instanceof Writable) {
            TypedBytesWritableOutput.get(dout).write((Writable) value);
          } else {
            TypedBytesOutput.get(dout).write(value);
          }
        }
      } finally {
        if (recReader != null) {
          recReader.close();
        }
      }
    }
    dout.flush();
    return 0;

View Full Code Here

    // Initialize input in-line since it sets parameters which may be used by the processor.
    // Done only for MRInput.
    // TODO use new method in MRInput to get required info
    //input.initialize(job, master);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);

View Full Code Here

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);


    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.hadoop.mapred.RecordReader

cascading.flow.tez.stream.element.TezSourceStage

org.apache.hadoop.hive.ql.io.HiveInputFormat

org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat

org.apache.hadoop.hive.ql.io.TestRCFile

org.apache.hadoop.record.test.TestWritable

org.apache.hadoop.record.TestRecordWritable

org.apache.hadoop.streaming.AutoInputFormat

org.apache.hadoop.streaming.DumpTypedBytes

org.apache.hadoop.streaming.TestAutoInputFormat

org.apache.tez.mapreduce.processor.map.MapProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.