Examples of org.apache.hadoop.mapreduce.RecordReader

org.apache.hadoop.mapreduce.RecordReader
The record reader breaks the data into key/value pairs for input to the {@link Mapper}. @param < KEYIN> @param < VALUEIN>


    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);


    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());


    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
  }

View Full Code Here

                testFile.length(), null);
        System.out.println("reading Gremlin Kryo file " + testFile.getAbsolutePath() + " (" + testFile.length() + " bytes)");


        KryoInputFormat inputFormat = ReflectionUtils.newInstance(KryoInputFormat.class, conf);
        TaskAttemptContext job = new TaskAttemptContext(conf, new TaskAttemptID());
        RecordReader reader = inputFormat.createRecordReader(split, job);


        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        try (DataOutputStream dos = new DataOutputStream(bos)) {
            KryoOutputFormat outputFormat = new KryoOutputFormat();
            RecordWriter writer = outputFormat.getRecordWriter(job, dos);


            float lastProgress = -1f;
            int count = 0;
            boolean foundKeyValue = false;
            while (reader.nextKeyValue()) {
                //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
                count++;
                float progress = reader.getProgress();
                assertTrue(progress >= lastProgress);
                assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
                GiraphComputeVertex v = (GiraphComputeVertex) reader.getCurrentValue();
                writer.write(NullWritable.get(), v);


                Vertex vertex = v.getBaseVertex();
                assertEquals(Integer.class, vertex.id().getClass());

View Full Code Here

                testFile.length(), null);
        System.out.println("reading GraphSON adjacency file " + testFile.getAbsolutePath() + " (" + testFile.length() + " bytes)");


        GraphSONInputFormat inputFormat = ReflectionUtils.newInstance(GraphSONInputFormat.class, conf);
        TaskAttemptContext job = new TaskAttemptContext(conf, new TaskAttemptID());
        RecordReader reader = inputFormat.createRecordReader(split, job);


        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        try (DataOutputStream dos = new DataOutputStream(bos)) {
            GraphSONOutputFormat outputFormat = new GraphSONOutputFormat();
            RecordWriter writer = outputFormat.getRecordWriter(job, dos);


            float lastProgress = -1f;
            int count = 0;
            boolean foundKeyValue = false;
            while (reader.nextKeyValue()) {
                //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
                count++;
                float progress = reader.getProgress();
                assertTrue(progress >= lastProgress);
                assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
                GiraphComputeVertex v = (GiraphComputeVertex) reader.getCurrentValue();
                writer.write(NullWritable.get(), v);


                Vertex vertex = v.getBaseVertex();
                assertEquals(Integer.class, vertex.id().getClass());

View Full Code Here

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here


    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);


    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());


    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
  }

View Full Code Here

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here


    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);


    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());


    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
  }

View Full Code Here

    CombineFileSplit combineSplit = (CombineFileSplit) split;


    // Use CombineFileRecordReader since this can handle CombineFileSplits
    // and instantiate another RecordReader in a loop; do this with the
    // CombineShimRecordReader.
    RecordReader rr = new CombineFileRecordReader(combineSplit, context,
        CombineShimRecordReader.class);


    return rr;
  }

View Full Code Here

    CombineFileSplit combineSplit = (CombineFileSplit) split;


    // Use CombineFileRecordReader since this can handle CombineFileSplits
    // and instantiate another RecordReader in a loop; do this with the
    // CombineShimRecordReader.
    RecordReader rr = new CombineFileRecordReader(combineSplit, context,
        CombineShimRecordReader.class);


    return rr;
  }

View Full Code Here

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };


    CombineFileSplit split = new CombineFileSplit(files, lengths);


    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);


    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
      rr.getCurrentKey().toString());


    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);


    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2",
      rr.getCurrentKey().toString());
  }

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.hadoop.mapreduce.RecordReader

com.endgame.binarypig.loaders.AbstractExecutingLoaderTest

com.endgame.binarypig.loaders.BinarySequenceFileLoaderTest

com.facebook.giraph.hive.impl.input.benchmark.InputBenchmark

com.tinkerpop.gremlin.giraph.structure.io.graphson.GraphSONRecordReaderWriterTest

com.tinkerpop.gremlin.giraph.structure.io.kryo.KryoRecordReaderWriterTest

com.twitter.elephantbird.pig.load.TestLocationAsTuple

org.apache.hadoop.mapreduce.lib.chain.Chain

org.apache.hadoop.mapreduce.lib.input.TestCombineFileInputFormat

org.apache.hcatalog.mapreduce.HCatBaseInputFormat

org.apache.howl.mapreduce.HowlInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.