Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

org.apache.hadoop.mapreduce.lib.input.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.

       int tableIndex = 0, fileNumber = 0;
       Integer[] fileNumbers = helper.getFileNumbers();
       if (fileNumbers.length != realReaders.size())
         throw new IOException("Number of tables in input paths of input splits is incorrect.");
       for (int j=0; j<inputSplits.length; j++) {
         FileSplit fileSplit = (FileSplit) inputSplits[j];
         start = fileSplit.getStart();
         if (start <= prevStart)
         {
           fileNumber++;
           if (fileNumber >= fileNumbers[tableIndex])
           {
             inputSplitBoundaries[tableIndex++] = j;
             fileNumber = 0;
           }
         }
         prevStart = start;
       }
       inputSplitBoundaries[tableIndex++] =  inputSplits.length;
       if (tableIndex != realReaders.size())
         throw new IOException("Number of tables in input splits is incorrect.");
       for (tableIndex = 0; tableIndex < realReaders.size(); tableIndex++)
       {
         int startSplitIndex = (tableIndex == 0 ? 0 : inputSplitBoundaries[tableIndex - 1]);
         int splitLen = (tableIndex == 0 ? inputSplitBoundaries[0] :
             inputSplitBoundaries[tableIndex] - inputSplitBoundaries[tableIndex-1]);
         BasicTable.Reader reader = realReaders.get(tableIndex);
         /* Get the index of the column group that will be used for row-split.*/
         int splitCGIndex = reader.getRowSplitCGIndex();
         
         long starts[] = new long[splitLen];
         long lengths[] = new long[splitLen];
         int batches[] = new int[splitLen + 1];
         batches[0] = 0;
         int numBatches = 0;
         Path paths[] = new Path [splitLen];
         long totalLen = 0;
         final double SPLIT_SLOP = 1.1;
         int endSplitIndex = startSplitIndex + splitLen;
         for (int j=startSplitIndex; j< endSplitIndex; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long blockSize = p.getFileSystem(conf).getBlockSize(p);
          long splitSize = (long) (helper.computeSplitSize(blockSize, minSize, totalBytes) * SPLIT_SLOP);
          start = fileSplit.getStart();
          long length = fileSplit.getLength();
          int index = j - startSplitIndex;
          starts[index] = start;
          lengths[index] = length;
          totalLen += length;
          paths[index] = p;

View Full Code Here

    protected void setup(Context context) throws IOException, InterruptedException {
        keyIndex = Integer.parseInt(context.getConfiguration().get("keyIndex"));
        String separator = context.getConfiguration().get("separator");
        splitter = Splitter.on(separator).trimResults();
        joiner = Joiner.on(separator);
        FileSplit fileSplit = (FileSplit)context.getInputSplit();
        joinOrder = Integer.parseInt(context.getConfiguration().get(fileSplit.getPath().getName()));
    }

View Full Code Here

        data.set(valuesWithOutKey);
        context.write(taggedKey, data);
    }


    private Map<String,String> getConfigurationMap(Context context){
        FileSplit fileSplit = (FileSplit)context.getInputSplit();
        String configString = context.getConfiguration().get(fileSplit.getPath().getName());
        return mapSplitter.split(configString);
    }

View Full Code Here

            LongWritable key = new LongWritable();
            Text value = new Text();
            try {
              while (inFileReader.next(key, value)) {
                if (counter % filesPerTask == filesPerTask - 1L) {
                  splits.add(new FileSplit(inFile, startPos, 
                      inFileReader.getPosition() - 
                      startPos,
                      null));
                  startPos = inFileReader.getPosition();
                }
                counter++;
              }


              // create input split for remaining items if necessary
              // this includes the case where no splits were created by the loop
              if (startPos != inFileReader.getPosition()) {
                splits.add(new FileSplit(inFile, startPos,
                    inFileReader.getPosition() - startPos,
                    null));
              }
            } finally {
              inFileReader.close();

View Full Code Here

        if (mapInputBytes < 0) {
          LOG.warn("InputBytes for task "+mapTask.getTaskID()+" is not defined.");
          mapInputBytes = 0;
        }
       
        splitsList.add(new FileSplit(emptyPath, 0, mapInputBytes, hosts));
      }


      // If not all map tasks are in job trace, should make up some splits
      // for missing map tasks.
      int totalMaps = job.getTotalMaps();
      if (totalMaps < splitsList.size()) {
        LOG.warn("TotalMaps for job " + job.getJobID()
            + " is less than the total number of map task descriptions ("
            + totalMaps + "<" + splitsList.size() + ").");
      }


      int avgHostPerSplit;
      if (splitsList.size() == 0) {
        avgHostPerSplit = 3;
      } else {
        avgHostPerSplit = totalHosts / splitsList.size();
        if (avgHostPerSplit == 0) {
          avgHostPerSplit = 3;
        }
      }


      for (int i = splitsList.size(); i < totalMaps; i++) {
        if (cluster == null) {
          splitsList.add(new FileSplit(emptyPath, 0, 0, new String[0]));
        } else {
          MachineNode[] mNodes = cluster.getRandomMachines(avgHostPerSplit,
                                                           random);
          String[] hosts = new String[mNodes.length];
          for (int j = 0; j < hosts.length; ++j) {
            hosts[j] = mNodes[j].getName();
          }
          // TODO set size of a split to 0 now.
          splitsList.add(new FileSplit(emptyPath, 0, 0, hosts));
        }
      }


      splits = splitsList.toArray(new InputSplit[splitsList.size()]);
    }

View Full Code Here


    // Set up the job configuration.
    Configuration conf = new Configuration();


    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();


    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

View Full Code Here


    // Set up the job configuration.
    Configuration conf = new Configuration();


    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();


    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

View Full Code Here

    // Set up the job configuration.
    Job job = new Job();
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.STRING));
    Configuration conf = job.getConfiguration();


    FileSplit inputSplit = createMock(FileSplit.class);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();


    replay(inputSplit);
    replay(context);

View Full Code Here

        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }

View Full Code Here


  @Override
  public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
      InterruptedException {


    FileSplit fSplit = (FileSplit)split;
    Path path = fSplit.getPath();
    Configuration conf = context.getConfiguration();
    this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
    this.end = fSplit.getStart() + fSplit.getLength();


    if(fSplit.getStart() > in.getPosition()) {
      in.sync(fSplit.getStart());
    }


    this.start = in.getPosition();
    more = start < end;

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

com.asakusafw.runtime.stage.input.TemporaryInputFormat

com.asakusafw.runtime.stage.input.TemporaryInputFormatTest

com.chine.kmeans.mapreduce.dataprep.DataPrepMapper

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat$TupleInputReader

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.hadoop.mapreduce.LzoTextInputFormat

com.mongodb.hadoop.splitter.BSONSplitter

com.thinkaurelius.faunus.formats.graphson.GraphSONRecordReaderTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.