Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.FileSplit


           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here


  public static class SymlinkTextInputSplit extends FileSplit {
    private final FileSplit split;

    public SymlinkTextInputSplit() {
      super((Path)null, 0, 0, (String[])null);
      split = new FileSplit((Path)null, 0, 0, (String[])null);
    }
View Full Code Here

          // we may miss a sync here
          HiveInputSplit newSplit = split;
          if (split.inputFormatClassName().contains("RCFile")
              || split.inputFormatClassName().contains("SequenceFile")) {
            if (split.getStart() > SequenceFile.SYNC_INTERVAL) {
              newSplit = new HiveInputSplit(new FileSplit(split.getPath(), split
                  .getStart()
                  - SequenceFile.SYNC_INTERVAL, split.getLength()
                  + SequenceFile.SYNC_INTERVAL, split.getLocations()), split
                  .inputFormatClassName());
            }
View Full Code Here

        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), job),
          job, reporter);
    }
View Full Code Here

  @Override
  public RecordReader<NullWritable, OrcStruct>
      getRecordReader(InputSplit inputSplit, JobConf conf,
                      Reporter reporter) throws IOException {
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reporter.setStatus(fileSplit.toString());
    return new OrcRecordReader(OrcFile.createReader(fs, path), conf,
                               fileSplit.getStart(), fileSplit.getLength());
  }
View Full Code Here

           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally {
        reader.close();
      }
View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length, job);
  }
View Full Code Here

      @Override
      public void map(LongWritable key, Text value,
          OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
          throws IOException {
        if (filePath == null) {
          FileSplit split = (FileSplit) reporter.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.