Examples of RecordReader

com.facebook.hive.orc.RecordReader
A row-by-row iterator for ORC files.
com.flaptor.util.sort.RecordReader
com.netflix.astyanax.util.RecordReader
@author elandau
org.apache.drill.exec.store.RecordReader
org.apache.hadoop.hive.ql.io.orc.RecordReader
A row-by-row iterator for ORC files.
org.apache.hadoop.mapred.RecordReader
RecordReader reads <key, value> pairs from an {@link InputSplit}.
RecordReader, typically, converts the byte-oriented view of the input, provided by the InputSplit, and presents a record-oriented view for the {@link Mapper} & {@link Reducer} tasks for processing. It thus assumes the responsibility of processing record boundaries and presenting the tasks with keys and values.
@see InputSplit @see InputFormat
org.apache.hadoop.mapreduce.RecordReader
The record reader breaks the data into key/value pairs for input to the {@link Mapper}. @param < KEYIN> @param < VALUEIN>
org.apache.hadoop.record.RecordReader
Front-end interface to deserializers. Also acts as a factory for deserializers. @author Milind Bhandarkar

Examples of org.apache.hadoop.mapred.RecordReader

    
    InputSplit inputSplit = input.getOldInputSplit();
    
    updateJobWithSplit(job, inputSplit);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);

View Full Code Here

Examples of org.apache.hadoop.mapred.RecordReader

        //LOG.info("splitting: got =        " + splits.length);


        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(fs, splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
              count++;
            }
            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContext(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader


    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);


    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());


    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

    //Get the input format for the storage driver
    InputFormat inputFormat =
      storageDriver.getInputFormat(partitionInfo.getInputStorageDriverProperties());


    //Create the underlying input formats record record and an Howl wrapper
    RecordReader recordReader =
      inputFormat.createRecordReader(howlSplit.getBaseSplit(), taskContext);


    return new HowlRecordReader(storageDriver,recordReader);
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

        // for backward compatibility
        PigInputFormat.sJob = conf;
        
        InputFormat inputFormat = loadFunc.getInputFormat();
        // now invoke the createRecordReader() with this "adjusted" conf
        RecordReader reader = inputFormat.createRecordReader(
                pigSplit.getWrappedSplit(), context);
        
        return new PigRecordReader(reader, loadFunc, conf);
    }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

    //Get the input format for the storage driver
    InputFormat inputFormat =
      storageDriver.getInputFormat(partitionInfo.getInputStorageDriverProperties());


    //Create the underlying input formats record record and an HCat wrapper
    RecordReader recordReader =
      inputFormat.createRecordReader(hcatSplit.getBaseSplit(), taskContext);


    return new HCatRecordReader(storageDriver,recordReader);
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader


    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);


    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());


    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
  }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.RecordReader

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.