Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.FileSplit


    for (int bucketId : bucketSplitMap.keySet()) {
      Long size = 0L;
      Collection<InputSplit>inputSplitCollection = bucketSplitMap.get(bucketId);
      Iterator<InputSplit> iter = inputSplitCollection.iterator();
      while (iter.hasNext()) {
        FileSplit fsplit = (FileSplit)iter.next();
        size += fsplit.getLength();
        totalSize += fsplit.getLength();
      }
      bucketSizeMap.put(bucketId, size);
    }

    int totalResource = context.getTotalAVailableResource().getMemory();
View Full Code Here


      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally {
        reader.close();
      }
View Full Code Here

  @Override
  public RecordReader<AvroWrapper<T>, NullWritable>
    getRecordReader(InputSplit split, final JobConf job,
                    Reporter reporter) throws IOException {
    final FileSplit file = (FileSplit)split;
    reporter.setStatus(file.toString());

    final AvroColumnReader.Params params =
      new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
    params.setModel(ReflectData.get());
    if (job.get(AvroJob.INPUT_SCHEMA) != null)
      params.setSchema(AvroJob.getInputSchema(job));

    return new RecordReader<AvroWrapper<T>, NullWritable>() {
View Full Code Here

      throw new IOException ("CombineHiveRecordReader: class not found " + inputFormatClassName);
    }
    InputFormat inputFormat = CombineHiveInputFormat.getInputFormatFromCache(inputFormatClass, job);
   
    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition],
                                     hsplit.getStartOffsets()[partition],
                                     hsplit.getLengths()[partition],
                                     hsplit.getLocations());
   
    this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter);
View Full Code Here

      reporter.setStatus(split.toString());
     
      // find part name
      SegmentPart segmentPart;
      final String spString;
      final FileSplit fSplit = (FileSplit) split;
      try {
        segmentPart = SegmentPart.get(fSplit);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }
     
      final SequenceFile.Reader reader =
        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
     
      final Writable w;
      try {
        w = (Writable) reader.getValueClass().newInstance();
      } catch (Exception e) {
View Full Code Here

      @Override
      public void map(LongWritable key, Text value,
          OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
          throws IOException {
        if (filePath == null) {
          FileSplit split = (FileSplit) reporter.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();
View Full Code Here

      return sideFs_.create(new Path(uri.getSchemeSpecificPart()));
    }
  }

  String getSideEffectFileName() {
    FileSplit split = StreamUtil.getCurrentSplit(job_);
    return new String(split.getPath().getName() + "-" + split.getStart() +
            "-" + split.getLength());
  }
View Full Code Here

      reporter.setStatus(split.toString());
     
      // find part name
      SegmentPart segmentPart;
      final String spString;
      final FileSplit fSplit = (FileSplit) split;
      try {
        segmentPart = SegmentPart.get(fSplit);
        spString = segmentPart.toString();
      } catch (IOException e) {
        throw new RuntimeException("Cannot identify segment:", e);
      }
     
      SequenceFile.Reader reader =
        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
     
      final Writable w;
      try {
        w = (Writable) reader.getValueClass().newInstance();
      } catch (Exception e) {
View Full Code Here

      } catch (Throwable e) {
          numMapsConfig = "mapred.map.tasks";
      }
      int numSplits = job.getInt(numMapsConfig, 1);
      for(int i=0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1,
                                  (String[])null));
      }
      InputSplit[] rval = {result.get(0)};
      rval = result.toArray(rval);
      return rval;
View Full Code Here

        }

        Path path = new Path(filePath);
        path.getFileSystem(new Configuration()).setVerifyChecksum(true);
        File file = new File(filePath);
        return new FileSplit(path, 0, file.length(), new String[0]);
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.