Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here


        Path jsonFilePath = new Path(jsonDataFile);
        FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration())
                                                .listStatus(jsonFilePath)[0];
        final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(),
                                                                                                                                         new FileSplit(jsonFilePath,
                                                                                                                                                       0,
                                                                                                                                                       jsonFileStatus.getLen(),
                                                                                                                                                       (String[]) null));


        PerformanceTest readWriteTest = new PerformanceTest() {

View Full Code Here


        Path jsonFilePath = new Path(jsonDataFile);
        FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration())
                                                .listStatus(jsonFilePath)[0];
        final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(),
                                                                                                                                         new FileSplit(jsonFilePath,
                                                                                                                                                       0,
                                                                                                                                                       jsonFileStatus.getLen(),
                                                                                                                                                       (String[]) null));


        PerformanceTest readWriteTest = new PerformanceTest() {

View Full Code Here

  @Override
  public RecordReader<NullWritable, OrcLazyRow>
      getRecordReader(InputSplit inputSplit, JobConf conf,
                      Reporter reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reporter.setStatus(fileSplit.toString());


    try {
      return new OrcRecordReader(
          OrcFile.createReader(fs, path, conf),
          conf,
          fileSplit.getStart(),
          fileSplit.getLength());
    } catch (IndexOutOfBoundsException e) {
      /**
       * When a non ORC file is read by ORC reader, we get IndexOutOfBoundsException exception while
       * creating a reader. Caught that exception and checked the file header to see if the input
       * file was ORC or not. If its not ORC, throw a NotAnORCFileException with the file

View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }

View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

        
        long starts[] = new long[inputSplits.length];
        long lengths[] = new long[inputSplits.length];
        Path paths[] = new Path [inputSplits.length];
        for (int j=0; j<inputSplits.length; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long start = fileSplit.getStart();
          long length = fileSplit.getLength();


          starts[j] = start;
          lengths[j] = length;
          paths[j] = p;
        }

View Full Code Here

      @Override
      public void map(LongWritable key, Text value,
          OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
          throws IOException {
        if (filePath == null) {
          FileSplit split = (FileSplit) reporter.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();

View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length, job);
  }

View Full Code Here

      int tableIndex = 0, fileNumber = 0;
      Integer[] fileNumbers = helper.getFileNumbers();
      if (fileNumbers.length != realReaders.size())
        throw new IOException("Number of tables in input paths of input splits is incorrect.");
      for (int j=0; j<inputSplits.length; j++) {
        FileSplit fileSplit = (FileSplit) inputSplits[j];
        start = fileSplit.getStart();
        if (start <= prevStart)
        {
          fileNumber++;
          if (fileNumber >= fileNumbers[tableIndex])
          {
            inputSplitBoundaries[tableIndex++] = j;
            fileNumber = 0;
          }
        }
        prevStart = start;
      }
      inputSplitBoundaries[tableIndex++] =  inputSplits.length;
      if (tableIndex != realReaders.size())
        throw new IOException("Number of tables in input splits is incorrect.");
      for (tableIndex = 0; tableIndex < realReaders.size(); tableIndex++)
      {
        int startSplitIndex = (tableIndex == 0 ? 0 : inputSplitBoundaries[tableIndex - 1]);
        int splitLen = (tableIndex == 0 ? inputSplitBoundaries[0] :
            inputSplitBoundaries[tableIndex] - inputSplitBoundaries[tableIndex-1]);
        BasicTable.Reader reader = realReaders.get(tableIndex);
        /* Get the index of the column group that will be used for row-split.*/
        int splitCGIndex = reader.getRowSplitCGIndex();
        
        long starts[] = new long[splitLen];
        long lengths[] = new long[splitLen];
        int batches[] = new int[splitLen + 1];
        batches[0] = 0;
        int numBatches = 0;
        int batchSize = 0;
        Path paths[] = new Path [splitLen];
        long totalLen = 0;
        final double SPLIT_SLOP = 1.1;
        int endSplitIndex = startSplitIndex + splitLen;
        for (int j=startSplitIndex; j< endSplitIndex; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long blockSize = p.getFileSystem(conf).getBlockSize(p);
          long splitSize = (long) (helper.computeSplitSize(goalSize, minSize, blockSize) * SPLIT_SLOP);
          start = fileSplit.getStart();
          long length = fileSplit.getLength();
          int index = j - startSplitIndex;
          starts[index] = start;
          lengths[index] = length;
          totalLen += length;
          paths[index] = p;

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.