Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

    for (int bucketId : bucketSplitMap.keySet()) {
      Long size = 0L;
      Collection<InputSplit>inputSplitCollection = bucketSplitMap.get(bucketId);
      Iterator<InputSplit> iter = inputSplitCollection.iterator();
      while (iter.hasNext()) {
        FileSplit fsplit = (FileSplit)iter.next();
        size += fsplit.getLength();
        totalSize += fsplit.getLength();
      }
      bucketSizeMap.put(bucketId, size);
    }


    int totalResource = context.getTotalAVailableResource().getMemory();

View Full Code Here

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }

View Full Code Here


  @Override
  public RecordReader<AvroWrapper<T>, NullWritable>
    getRecordReader(InputSplit split, final JobConf job,
                    Reporter reporter) throws IOException {
    final FileSplit file = (FileSplit)split;
    reporter.setStatus(file.toString());


    final AvroColumnReader.Params params =
      new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
    params.setModel(ReflectData.get());
    if (job.get(AvroJob.INPUT_SCHEMA) != null)
      params.setSchema(AvroJob.getInputSchema(job));


    return new RecordReader<AvroWrapper<T>, NullWritable>() {

View Full Code Here

      throw new IOException ("CombineHiveRecordReader: class not found " + inputFormatClassName);
    }
    InputFormat inputFormat = CombineHiveInputFormat.getInputFormatFromCache(inputFormatClass, job);
    
    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition],
                                     hsplit.getStartOffsets()[partition],
                                     hsplit.getLengths()[partition],
                                     hsplit.getLocations());
    
    this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter);

View Full Code Here

      reporter.setStatus(split.toString());
      
      // find part name
      SegmentPart segmentPart;
      final String spString;
      final FileSplit fSplit = (FileSplit) split;
      try {
        segmentPart = SegmentPart.get(fSplit);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }
      
      final SequenceFile.Reader reader =
        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
      
      final Writable w;
      try {
        w = (Writable) reader.getValueClass().newInstance();
      } catch (Exception e) {

View Full Code Here

      @Override
      public void map(LongWritable key, Text value,
          OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
          throws IOException {
        if (filePath == null) {
          FileSplit split = (FileSplit) reporter.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();

View Full Code Here

      return sideFs_.create(new Path(uri.getSchemeSpecificPart()));
    }
  }


  String getSideEffectFileName() {
    FileSplit split = StreamUtil.getCurrentSplit(job_);
    return new String(split.getPath().getName() + "-" + split.getStart() + 
            "-" + split.getLength());
  }

View Full Code Here

      reporter.setStatus(split.toString());
      
      // find part name
      SegmentPart segmentPart;
      final String spString;
      final FileSplit fSplit = (FileSplit) split;
      try {
        segmentPart = SegmentPart.get(fSplit);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }
      
      SequenceFile.Reader reader =
        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
      
      final Writable w;
      try {
        w = (Writable) reader.getValueClass().newInstance();
      } catch (Exception e) {

View Full Code Here

      } catch (Throwable e) {
          numMapsConfig = "mapred.map.tasks";
      }
      int numSplits = job.getInt(numMapsConfig, 1);
      for(int i=0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                                  (String[])null));
      }
      InputSplit[] rval = {result.get(0)};
      rval = result.toArray(rval); 
      return rval;

View Full Code Here

        }


        Path path = new Path(filePath);
        path.getFileSystem(new Configuration()).setVerifyChecksum(true);
        File file = new File(filePath);
        return new FileSplit(path, 0, file.length(), new String[0]);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.