Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

    AvroJob.setInputSchema(job, readerSchema);


    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro1-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    AvroWrapper<Utf8> inputPair = new AvroWrapper<Utf8>(null);
    NullWritable ignore = NullWritable.get();
    AvroRecordReader<Utf8> recordReader = new AvroRecordReader<Utf8>(job, fileSplit);
    long sumOfCounts = 0;
    long numOfCounts = 0;

View Full Code Here

    AvroJob.setInputSchema(job, readerSchema);


    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/testavrofile1-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    AvroWrapper<Utf8> inputPair = new AvroWrapper<Utf8>(null);
    NullWritable ignore = NullWritable.get();
    AvroRecordReader<Utf8> recordReader = new AvroRecordReader<Utf8>(job, fileSplit);
    long sumOfCounts = 0;
    long numOfCounts = 0;

View Full Code Here

    Schema readerSchema = Schema.create(Schema.Type.STRING);
    AvroJob.setInputSchema(job, readerSchema);
    String dir= System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro2-m-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    AvroRecordReader<Utf8> recordReader_new = new AvroRecordReader<Utf8>(job, fileSplit);
    AvroWrapper<Utf8> inputPair_new = new AvroWrapper<Utf8>(null);
    NullWritable ignore = NullWritable.get();
    long testl=0;
     while(recordReader_new.next(inputPair_new, ignore)) {

View Full Code Here

        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      LOG.info("jobname= " + jobName + " numSplits=" + numSplits + 
               ", splits.size()=" + splits.size());
      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

          // we may miss a sync here
          HiveInputSplit newSplit = split;
          if (split.inputFormatClassName().contains("RCFile")
              || split.inputFormatClassName().contains("SequenceFile")) {
            if (split.getStart() > SequenceFile.SYNC_INTERVAL) {
              newSplit = new HiveInputSplit(new FileSplit(split.getPath(),
                  split.getStart() - SequenceFile.SYNC_INTERVAL,
                  split.getLength() + SequenceFile.SYNC_INTERVAL,
                  split.getLocations()),
                  split.inputFormatClassName());
            }

View Full Code Here


  public void initIOContext(FileSplit split, JobConf job,
      Class inputFormatClass, RecordReader recordReader) throws IOException {
    boolean blockPointer = false;
    long blockStart = -1;
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(job);
    if (inputFormatClass.getName().contains("SequenceFile")) {
      SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
      blockPointer = in.isBlockCompressed();
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    } else if (recordReader instanceof RCFileRecordReader) {
      blockPointer = true;
      blockStart = ((RCFileRecordReader) recordReader).getStart();
    } else if (inputFormatClass.getName().contains("RCFile")) {
      blockPointer = true;
      RCFile.Reader in = new RCFile.Reader(fs, path, job);
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    }
    this.initIOContext(blockStart, blockPointer, split.getPath().toString());
  }

View Full Code Here

      reporter.setStatus(split.toString());
      
      // find part name
      SegmentPart segmentPart;
      final String spString;
      final FileSplit fSplit = (FileSplit) split;
      try {
        segmentPart = SegmentPart.get(fSplit);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }
      
      SequenceFile.Reader reader =
        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
      
      final Writable w;
      try {
        w = (Writable) reader.getValueClass().newInstance();
      } catch (Exception e) {

View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

  public Row first() throws HiveException {
    Row r = super.first();


    if ( blockInfos.size() > 0 ) {
      InputSplit[] inputSplits = getInputSplits();
      FileSplit fS = null;
      BlockInfo bI = blockInfos.get(0);
      bI.startingSplit = 0;
      int i = 1;
      bI = i < blockInfos.size() ? blockInfos.get(i) : null;
      for(int j=1; j < inputSplits.length && bI != null; j++) {
        fS = (FileSplit) inputSplits[j];
        while (bI != null && bI.startOffset < fS.getStart() ) {
          bI.startingSplit = j - 1;
          i++;
          bI = i < blockInfos.size() ? blockInfos.get(i) : null;
        }
      }

View Full Code Here

        Preconditions.checkState(false);
      } else if (event instanceof RootInputDataInformationEvent) {
        dataInformationEventSeen = true;
        RootInputDataInformationEvent diEvent = (RootInputDataInformationEvent) event;
        dataInformationEvents.add(diEvent);
        FileSplit fileSplit;
        try {
          fileSplit = getFileSplitFromEvent(diEvent);
        } catch (IOException e) {
          throw new RuntimeException("Failed to get file split for event: " + diEvent);
        }
        List<FileSplit> fsList = pathFileSplitsMap.get(fileSplit.getPath()); 
        if (fsList == null) {
          fsList = new ArrayList<FileSplit>();
          pathFileSplitsMap.put(fileSplit.getPath(), fsList);
        }
        fsList.add(fileSplit);
      }
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.