Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

    return split;
  }


  private void setIncrementalConfigParams(InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      this.incrementalConf = new Configuration(false);


      this.incrementalConf.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath()
          .toString());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_START,
          fileSplit.getStart());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_PATH,
          fileSplit.getLength());
    }
    LOG.info("Processing split: " + inputSplit);
  }

View Full Code Here

   * @param job the job configuration to update
   * @param inputSplit the file split
   */
  private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
      job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
      job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
    }
    LOG.info("Processing mapred split: " + inputSplit);
  }

View Full Code Here

        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      LOG.info("jobname= " + jobName + " numSplits=" + numSplits + 
               ", splits.size()=" + splits.size());
      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

    return split;
  }


  private void setIncrementalConfigParams(InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      this.incrementalConf = new Configuration(false);


      this.incrementalConf.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath()
          .toString());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_START,
          fileSplit.getStart());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_PATH,
          fileSplit.getLength());
    }
    LOG.info("Processing split: " + inputSplit);
  }

View Full Code Here

  private int dumpTypedBytes(List<FileStatus> files) throws IOException {
    JobConf job = new JobConf(getConf()); 
    DataOutputStream dout = new DataOutputStream(System.out);
    AutoInputFormat autoInputFormat = new AutoInputFormat();
    for (FileStatus fileStatus : files) {
      FileSplit split = new FileSplit(fileStatus.getPath(), 0,
        fileStatus.getLen() * fileStatus.getBlockSize(),
        (String[]) null);
      RecordReader recReader = null;
      try {
        recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);

View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }

View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get(MRJobConfig.MAP_INPUT_START));
    long length = Long.parseLong(job.get(MRJobConfig.MAP_INPUT_PATH));
    return new FileSplit(p, start, length, job);
  }

View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.