Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

      }
      
      inputFn.initialize();
      
      FileStatus status = fs.getFileStatus(path);
      FileSplit split = new FileSplit(path, 0, status.getLen(), new String[0]);
      
      JobConf conf = new JobConf();
      if (readColumns != null) {
        conf.setBoolean(OrcFileSource.HIVE_READ_ALL_COLUMNS, false);
        conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, OrcFileSource.getColumnIdsStr(readColumns));

View Full Code Here

      int tableIndex = 0, fileNumber = 0;
      Integer[] fileNumbers = helper.getFileNumbers();
      if (fileNumbers.length != realReaders.size())
        throw new IOException("Number of tables in input paths of input splits is incorrect.");
      for (int j=0; j<inputSplits.length; j++) {
        FileSplit fileSplit = (FileSplit) inputSplits[j];
        start = fileSplit.getStart();
        if (start <= prevStart)
        {
          fileNumber++;
          if (fileNumber >= fileNumbers[tableIndex])
          {
            inputSplitBoundaries[tableIndex++] = j;
            fileNumber = 0;
          }
        }
        prevStart = start;
      }
      inputSplitBoundaries[tableIndex++] =  inputSplits.length;
      if (tableIndex != realReaders.size())
        throw new IOException("Number of tables in input splits is incorrect.");
      for (tableIndex = 0; tableIndex < realReaders.size(); tableIndex++)
      {
        int startSplitIndex = (tableIndex == 0 ? 0 : inputSplitBoundaries[tableIndex - 1]);
        int splitLen = (tableIndex == 0 ? inputSplitBoundaries[0] :
            inputSplitBoundaries[tableIndex] - inputSplitBoundaries[tableIndex-1]);
        BasicTable.Reader reader = realReaders.get(tableIndex);
        /* Get the index of the column group that will be used for row-split.*/
        int splitCGIndex = reader.getRowSplitCGIndex();
        
        long starts[] = new long[splitLen];
        long lengths[] = new long[splitLen];
        int batches[] = new int[splitLen + 1];
        batches[0] = 0;
        int numBatches = 0;
        int batchSize = 0;
        Path paths[] = new Path [splitLen];
        long totalLen = 0;
        final double SPLIT_SLOP = 1.1;
        int endSplitIndex = startSplitIndex + splitLen;
        for (int j=startSplitIndex; j< endSplitIndex; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long blockSize = p.getFileSystem(conf).getBlockSize(p);
          long splitSize = (long) (helper.computeSplitSize(goalSize, minSize, blockSize) * SPLIT_SLOP);
          start = fileSplit.getStart();
          long length = fileSplit.getLength();
          int index = j - startSplitIndex;
          starts[index] = start;
          lengths[index] = length;
          totalLen += length;
          paths[index] = p;

View Full Code Here

          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          long increment = getIncrement(key, value);
          if (acc + increment > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += increment;
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

      try{
        sl = new SequenceFile.Reader(splitListFs, splitListPath, job);
        LongWritable startpos = new LongWritable();
        LongWritable length = new LongWritable();
        while (sl.next(startpos, length)) {
          splits.add(new FileSplit(srcFileListPath, startpos.get(), 
              length.get(), (String[])null));
        }
      }
      finally{
        checkAndClose(sl);

View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }

View Full Code Here

        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      LOG.info("jobname= " + jobName + " numSplits=" + numSplits + 
               ", splits.size()=" + splits.size());
      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

    AvroJob.setInputSchema(job, readerSchema);
    
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);


    
    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
    
    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);

View Full Code Here

    AvroJob.setInputSchema(job, readerSchema);
    
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/testavrofile-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);


    
    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
    
    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.