Examples of org.apache.hadoop.mapred.InputSplit

org.apache.hadoop.mapred.InputSplit
The description of the data for a single map task. @author Owen O'Malley

      LOG.info("splitting: got =        " + splits.length);


      // we should have a single split as the length is comfortably smaller than
      // the block size
      Assert.assertEquals("We got more than one splits!", 1, splits.length);
      InputSplit split = splits[0];
      Assert.assertEquals("It should be TezGroupedSplit",
        TezGroupedSplit.class, split.getClass());


      // check the split
      BitSet bits = new BitSet(length);
      LOG.debug("split= " + split);
      RecordReader<LongWritable, Text> reader =

View Full Code Here

    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
    
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l);
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l);
    InputSplit mockSplit1 = mock(InputSplit.class);
    when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit1.getLocations()).thenReturn(null);
    int numSplits = 100;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    for (int i=0; i<numSplits; i++) {
      mockSplits[i] = mockSplit1;
    }

View Full Code Here

    }


    Deserializer<InputSplit> deserializer = serializationFactory
        .getDeserializer(clazz);
    deserializer.open(splitProto.getSplitBytes().newInput());
    InputSplit inputSplit = deserializer.deserialize(null);
    deserializer.close();
    return inputSplit;
  }

View Full Code Here

  public RecordReader getRecordReader(InputSplit split, JobConf job,
      Reporter reporter) throws IOException {


    HiveInputSplit hsplit = (HiveInputSplit) split;


    InputSplit inputSplit = hsplit.getInputSplit();
    String inputFormatClassName = null;
    Class inputFormatClass = null;
    try {
      inputFormatClassName = hsplit.inputFormatClassName();
      inputFormatClass = job.getClassByName(inputFormatClassName);

View Full Code Here

    HBaseTestingUtility.SeenRowTracker rowTracker =
      new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);


    for (int i = 0; i < splits.length; i++) {
      // validate input split
      InputSplit split = splits[i];
      Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);


      // validate record reader
      OutputCollector collector = mock(OutputCollector.class);
      Reporter reporter = mock(Reporter.class);

View Full Code Here

      
      FileInputFormat.setInputPaths(job, path);
      
      ARCFileInputFormat inputFormat = new ARCFileInputFormat();
      
      InputSplit splits[] = inputFormat.getSplits(job,0);
      
      for (InputSplit split : splits) { 
        RecordReader<Text,BytesWritable> reader = inputFormat.getRecordReader(split, job, null);
        validateSplit(fs,split,fileList,reader);
      }

View Full Code Here

      
      FileInputFormat.setInputPaths(job, path);
      
      ARCFileItemInputFormat inputFormat = new ARCFileItemInputFormat();
      
      InputSplit splits[] = inputFormat.getSplits(job,0);
      
      for (InputSplit split : splits) { 
        RecordReader<Text,ArcFileItem> reader = inputFormat.getRecordReader(split, job, null);
        validateArcFileItemSplit(fs,split,fileList,reader);
      }

View Full Code Here

  {
    if (currSplitNum < iSplits.length)
    {
      if (rdr != null)
        rdr.close();
      InputSplit curriSplit = iSplits[currSplitNum++];
      rdr = (org.apache.hadoop.mapred.RecordReader<Writable, Writable>) iFmt
          .getRecordReader(curriSplit, jconf, Reporter.NULL);
      if (key == null)
      {
        key = rdr.createKey();

View Full Code Here

      LOG.info("splitting: got =        " + splits.length);


      // we should have a single split as the length is comfortably smaller than
      // the block size
      Assert.assertEquals("We got more than one splits!", 1, splits.length);
      InputSplit split = splits[0];
      Assert.assertEquals("It should be TezGroupedSplit",
        TezGroupedSplit.class, split.getClass());


      // check the split
      BitSet bits = new BitSet(length);
      LOG.debug("split= " + split);
      RecordReader<LongWritable, Text> reader =

View Full Code Here

    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
    
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l);
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l);
    InputSplit mockSplit1 = mock(InputSplit.class);
    when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit1.getLocations()).thenReturn(null);
    int numSplits = 100;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    for (int i=0; i<numSplits; i++) {
      mockSplits[i] = mockSplit1;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.InputSplit

com.ebay.erl.mobius.core.mapred.MobiusInputSampler

com.sap.hadoop.windowing.io.TableWindowingInput

com.taobao.zeus.jobs.sub.tool.DataPreviewJob

org.apache.accumulo.core.client.mapred.AccumuloInputFormatTest

org.apache.drill.exec.store.hive.HiveSubScan

org.apache.hadoop.contrib.mongoreduce.MongoStreamInputFormat

org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat

org.apache.hadoop.hive.accumulo.mr.TestHiveAccumuloTableInputFormat

org.apache.hadoop.hive.ql.exec.StatsNoJobTask

org.apache.hadoop.hive.ql.exec.StatsNoJobTask$StatsCollection

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.