Examples of org.apache.hadoop.mapred.RecordReader

org.apache.hadoop.mapred.RecordReader
RecordReader reads <key, value> pairs from an {@link InputSplit}.
RecordReader, typically, converts the byte-oriented view of the input, provided by the InputSplit, and presents a record-oriented view for the {@link Mapper} & {@link Reducer} tasks for processing. It thus assumes the responsibility of processing record boundaries and presenting the tasks with keys and values.
@see InputSplit @see InputFormat

        InputSplit[] splits = format.getSplits(job, numSplits);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, Reporter.NULL);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);


    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);


    HiveRecordReader<K,V> rr = new HiveRecordReader(innerReader);
    rr.initIOContext(hsplit, job, inputFormatClass, innerReader);
    return rr;

View Full Code Here

    InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
    assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.length; i++) {
      int previousReadCount = readCount;
      RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
      Object key = rr.createKey();
      Object value = rr.createValue();
      while (rr.next(key, value)) {
        readCount++;
      }
      System.out.println("The " + i + "th split read "
          + (readCount - previousReadCount));
    }

View Full Code Here

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);


    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil

View Full Code Here

    InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
    assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.length; i++) {
      int previousReadCount = readCount;
      RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
      Object key = rr.createKey();
      Object value = rr.createValue();
      while (rr.next(key, value)) {
        readCount++;
      }
      System.out.println("The " + i + "th split read "
          + (readCount - previousReadCount));
    }

View Full Code Here

    final String localHostname =
      DNS.getDefaultHost(job.get("mapred.tasktracker.dns.interface","default"),
          job.get("mapred.tasktracker.dns.nameserver","default"));




    return new RecordReader() {
      private String hostname = localHostname;
      private File[] userlogsDirs = tds;
      private int userlogsDirsIndex = 0;
      private LineRecordReader lrr = null;
      private long accumulatingPosition = 0;

View Full Code Here

    public RecordReader getRecordReader(final InputSplit split, 
      final JobConf job, final Reporter reporter)
      throws IOException
    {
      // Only one record/line to read.
      return new RecordReader()
      {
        private final String line = ((LineInputSplit)split).line;
        private boolean read = false;
        
        public void close() throws IOException

View Full Code Here

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);


    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil

View Full Code Here

    InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
    assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.length; i++) {
      int previousReadCount = readCount;
      RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
      Object key = rr.createKey();
      Object value = rr.createValue();
      while(rr.next(key, value)) 
        readCount ++;
      System.out.println("The " + i + "th split read "
          + (readCount - previousReadCount));
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);

View Full Code Here

        InputSplit[] splits = format.getSplits(job, numSplits);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.hadoop.mapred.RecordReader

cascading.flow.tez.stream.element.TezSourceStage

org.apache.hadoop.hive.ql.io.HiveInputFormat

org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat

org.apache.hadoop.hive.ql.io.TestRCFile

org.apache.hadoop.record.test.TestWritable

org.apache.hadoop.record.TestRecordWritable

org.apache.hadoop.streaming.AutoInputFormat

org.apache.hadoop.streaming.DumpTypedBytes

org.apache.hadoop.streaming.TestAutoInputFormat

org.apache.tez.mapreduce.processor.map.MapProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.