Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

org.apache.hadoop.mapreduce.lib.input.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.

    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
    File f = createFile(xml3);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());

View Full Code Here

  public void testPartialXML2() throws Exception {
    File f = createFile(xml3);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());

View Full Code Here

  public void testLineSplitting() throws Exception {
    File f = createFile(xml4);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());

View Full Code Here

  @Test
  public void testNoEndTokenHandling() throws Exception {
    File f = createFile(xml5);
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue("Not enough records returned.", reader.nextKeyValue());

View Full Code Here

    Assert.assertNotNull(url);
    File data = new File(url.toURI());
    Path tmpFile = new Path(data.getAbsolutePath());
    
    // Setup the Mapper
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null),0);
    AggregatingRecordReader rr = new AggregatingRecordReader();
    Path ocPath = new Path(tmpFile, "oc");
    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
    fs.deleteOnExit(ocPath);
    StandaloneStatusReporter sr = new StandaloneStatusReporter();

View Full Code Here

  public void testSerialization() throws IOException {
    Path testPath = new Path("/foo/bar");
    String[] hosts = new String[2];
    hosts[0] = "abcd";
    hosts[1] = "efgh";
    FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts);
    WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream out = new ObjectOutputStream(baos);
    split.write(out);
    out.close();
    baos.close();
    
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    DataInput in = new ObjectInputStream(bais);
    
    WikipediaInputSplit split2 = new WikipediaInputSplit();
    split2.readFields(in);
    Assert.assertTrue(bais.available() == 0);
    bais.close();
    
    Assert.assertTrue(split.getPartition() == split2.getPartition());
    
    FileSplit fSplit2 = split2.getFileSplit();
    Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
    Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
    Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
    
    String[] hosts2 = fSplit2.getLocations();
    Assert.assertEquals(hosts.length, hosts2.length);
    for (int i = 0; i < hosts.length; i++) {
      Assert.assertEquals(hosts[i], hosts2[i]);
    }
  }

View Full Code Here

    Assert.assertNotNull(url);
    File data = new File(url.toURI());
    Path tmpFile = new Path(data.getAbsolutePath());
    
    // Setup the Mapper
    InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null);
    AggregatingRecordReader rr = new AggregatingRecordReader();
    Path ocPath = new Path(tmpFile, "oc");
    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
    fs.deleteOnExit(ocPath);
    StandaloneStatusReporter sr = new StandaloneStatusReporter();

View Full Code Here

    }


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      if (first) {
        FileSplit split = (FileSplit) context.getInputSplit();
        Path path = split.getPath(); // current split path
        lvalue.set(path.getName());
        context.write(key, lvalue);


        first = false;
      }

View Full Code Here


    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
        InterruptedException {


        FileSplit fSplit = (FileSplit) split;
        Path path = fSplit.getPath();
        Configuration conf = context.getConfiguration();
        this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
        this.end = fSplit.getStart() + fSplit.getLength();


        if (fSplit.getStart() > in.getPosition()) {
            in.sync(fSplit.getStart());
        }


        this.start = in.getPosition();
        more = start < end;

View Full Code Here

        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

com.asakusafw.runtime.stage.input.TemporaryInputFormat

com.asakusafw.runtime.stage.input.TemporaryInputFormatTest

com.chine.kmeans.mapreduce.dataprep.DataPrepMapper

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat$TupleInputReader

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.hadoop.mapreduce.LzoTextInputFormat

com.mongodb.hadoop.splitter.BSONSplitter

com.thinkaurelius.faunus.formats.graphson.GraphSONRecordReaderTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.