Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.FileSplit


      //iterate thru the result path list
      for (Path path : result) {
        RecordReader<Text, Text> recordReader = null;
//        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
        try {
          recordReader = new KeyValueLineRecordReader(job, new FileSplit(path, 0, fs.getFileStatus(path).getLen(), (String[]) null));
          Text key = new Text();
          Text value = new Text();
          //int counter = 1;
          while (recordReader.next(key, value)) {
            //get the cluster info
View Full Code Here


    for (Path p : result) {

      KeyValueLineRecordReader reader = null;

      try {
        reader = new KeyValueLineRecordReader(conf, new FileSplit(p, 0, fs
            .getFileStatus(p).getLen(), (String[]) null));
        Text key = new Text();
        Text value = new Text();
        while (converged && reader.next(key, value)) {
          converged = value.toString().charAt(0) == 'V';
View Full Code Here

    setupComplete = true;
  }

  private void setIncrementalConfigParams(InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      this.incrementalConf = new Configuration(false);

      this.incrementalConf.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
      this.incrementalConf.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
    }
    LOG.info("Processing split: " + inputSplit);
  }
View Full Code Here

   * @param job the job configuration to update
   * @param inputSplit the file split
   */
  private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
    if (inputSplit instanceof FileSplit) {
      FileSplit fileSplit = (FileSplit) inputSplit;
      job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
      job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
      job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
    }
    LOG.info("Processing mapred split: " + inputSplit);
  }
View Full Code Here

        for(in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value); ) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[])null));
            prev = curr;
          }
        }
      }
      finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[])null));
      }
      LOG.info("numSplits="  + numSplits + ", splits.size()=" + splits.size());
      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally {
        reader.close();
      }
View Full Code Here

      return sideFs_.create(new Path(uri.getSchemeSpecificPart()));
    }
  }

  String getSideEffectFileName() {
    FileSplit split = StreamUtil.getCurrentSplit(job_);
    String leaf = split.getPath().getName();
    if (split.getStart() == 0) {
      return leaf;
    } else {
      return new FileSplit(new Path(leaf), split.getStart(), split.getLength()).toString();
    }
  }
View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length);
  }
View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.