Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
    File f = createFile(xml3);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here


  public void testPartialXML2() throws Exception {
    File f = createFile(xml3);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  public void testLineSplitting() throws Exception {
    File f = createFile(xml4);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  @Test
  public void testNoEndTokenHandling() throws Exception {
    File f = createFile(xml5);
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue("Not enough records returned.", reader.nextKeyValue());
View Full Code Here

    Assert.assertNotNull(url);
    File data = new File(url.toURI());
    Path tmpFile = new Path(data.getAbsolutePath());
   
    // Setup the Mapper
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null),0);
    AggregatingRecordReader rr = new AggregatingRecordReader();
    Path ocPath = new Path(tmpFile, "oc");
    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
    fs.deleteOnExit(ocPath);
    StandaloneStatusReporter sr = new StandaloneStatusReporter();
View Full Code Here

  public void testSerialization() throws IOException {
    Path testPath = new Path("/foo/bar");
    String[] hosts = new String[2];
    hosts[0] = "abcd";
    hosts[1] = "efgh";
    FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts);
    WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream out = new ObjectOutputStream(baos);
    split.write(out);
    out.close();
    baos.close();
   
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    DataInput in = new ObjectInputStream(bais);
   
    WikipediaInputSplit split2 = new WikipediaInputSplit();
    split2.readFields(in);
    Assert.assertTrue(bais.available() == 0);
    bais.close();
   
    Assert.assertTrue(split.getPartition() == split2.getPartition());
   
    FileSplit fSplit2 = split2.getFileSplit();
    Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
    Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
    Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
   
    String[] hosts2 = fSplit2.getLocations();
    Assert.assertEquals(hosts.length, hosts2.length);
    for (int i = 0; i < hosts.length; i++) {
      Assert.assertEquals(hosts[i], hosts2[i]);
    }
  }
View Full Code Here

    Assert.assertNotNull(url);
    File data = new File(url.toURI());
    Path tmpFile = new Path(data.getAbsolutePath());
   
    // Setup the Mapper
    InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null);
    AggregatingRecordReader rr = new AggregatingRecordReader();
    Path ocPath = new Path(tmpFile, "oc");
    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
    fs.deleteOnExit(ocPath);
    StandaloneStatusReporter sr = new StandaloneStatusReporter();
View Full Code Here

    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      if (first) {
        FileSplit split = (FileSplit) context.getInputSplit();
        Path path = split.getPath(); // current split path
        lvalue.set(path.getName());
        context.write(key, lvalue);

        first = false;
      }
View Full Code Here

    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
        InterruptedException {

        FileSplit fSplit = (FileSplit) split;
        Path path = fSplit.getPath();
        Configuration conf = context.getConfiguration();
        this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
        this.end = fSplit.getStart() + fSplit.getLength();

        if (fSplit.getStart() > in.getPosition()) {
            in.sync(fSplit.getStart());
        }

        this.start = in.getPosition();
        more = start < end;
View Full Code Here

        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.