Examples of org.apache.hadoop.mapred.InputSplit

org.apache.hadoop.mapred.InputSplit
The description of the data for a single map task. @author Owen O'Malley

  public RecordReader getRecordReader(InputSplit split, JobConf job,
      Reporter reporter) throws IOException {


    HiveInputSplit hsplit = (HiveInputSplit) split;


    InputSplit inputSplit = hsplit.getInputSplit();
    String inputFormatClassName = null;
    Class inputFormatClass = null;
    try {
      inputFormatClassName = hsplit.inputFormatClassName();
      inputFormatClass = job.getClassByName(inputFormatClassName);

View Full Code Here

  }


  @Override
  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();


    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;

View Full Code Here

  public void testIteratorNotInSplitsCompensation() throws Exception {
    FileInputFormat.addInputPath(conf, new Path("unused"));
    InputSplit[] splits = inputformat.getSplits(conf, 0);


    assertEquals(1, splits.length);
    InputSplit split = splits[0];


    IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1,
        PrimitiveComparisonFilter.class);


    is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());

View Full Code Here

    MapWork work = populateMapWork(jobConf, inputName);
    Multimap<Integer, InputSplit> bucketSplitMultiMap =
        ArrayListMultimap.<Integer, InputSplit> create();


    int i = 0;
    InputSplit prevSplit = null;
    for (InputSplit s : splits) {
      // this is the bit where we make sure we don't group across partition
      // schema boundaries
      if (schemaEvolved(s, prevSplit, groupAcrossFiles, work)) {
        ++i;

View Full Code Here

    byte[] serialized = dob.getData();
    return UserPayload.create(ByteBuffer.wrap(serialized));
  }


  private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws IOException {
    InputSplit inputSplit = null;
    if (event.getDeserializedUserPayload() != null) {
      inputSplit = (InputSplit) event.getDeserializedUserPayload();
    } else {
      MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(event.getUserPayload()));
      SerializationFactory serializationFactory = new SerializationFactory(new Configuration());
      inputSplit = MRInputHelpers.createOldFormatSplitFromUserPayload(splitProto, serializationFactory);
    }


    if (!(inputSplit instanceof FileSplit)) {
      throw new UnsupportedOperationException(
          "Cannot handle splits other than FileSplit for the moment. Current input split type: "
              + inputSplit.getClass().getSimpleName());
    }
    return (FileSplit) inputSplit;
  }

View Full Code Here

      // 忽略目录
      if (f.isDir()) {
        continue;
      }


      @SuppressWarnings("deprecation")
      InputSplit split = new FileSplit(f.getPath(), 0, f.getLen(),
          new JobConf(conf));
      reader = inputFormat.getRecordReader(split, confQ, Reporter.NULL);
      Writable key = null;
      Text textValue = new Text();

View Full Code Here

          boolean statsAvailable = false;
          for(FileStatus file: fileList) {
            if (!file.isDir()) {
              InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(
                  table.getInputFormatClass(), jc);
              InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table
                  .getDataLocation().toString() });
              org.apache.hadoop.mapred.RecordReader<?, ?> recordReader = (org.apache.hadoop.mapred.RecordReader<?, ?>) inputFormat
                  .getRecordReader(dummySplit, jc, Reporter.NULL);
              StatsProvidingRecordReader statsRR;
              if (recordReader instanceof StatsProvidingRecordReader) {

View Full Code Here

        boolean statsAvailable = false;
        for(FileStatus file: fileList) {
          if (!file.isDir()) {
            InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(
                partn.getInputFormatClass(), jc);
            InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0,
                new String[] { partn.getLocation() });
            org.apache.hadoop.mapred.RecordReader<?, ?> recordReader =
                (org.apache.hadoop.mapred.RecordReader<?, ?>)
                inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
            StatsProvidingRecordReader statsRR;

View Full Code Here

@SuppressWarnings("deprecation")
public class TestMockReporter {


  @Test
  public void testGetInputSplitForMapper() {
    InputSplit split = new MockReporter(MockReporter.ReporterType.Mapper, null).getInputSplit();
    assertTrue(null != split);
  }

View Full Code Here

  public HiveReadEntry getHiveReadEntry() {
    return hiveReadEntry;
  }


  public static InputSplit deserializeInputSplit(String base64, String className) throws IOException, ReflectiveOperationException{
    InputSplit split;
    if (Class.forName(className) == FileSplit.class) {
      split = new FileSplit((Path) null, 0, 0, (String[])null);
    } else {
      split = (InputSplit) Class.forName(className).getConstructor().newInstance();
    }
    ByteArrayDataInput byteArrayDataInput = ByteStreams.newDataInput(Base64.decodeBase64(base64));
    split.readFields(byteArrayDataInput);
    return split;
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.InputSplit

com.ebay.erl.mobius.core.mapred.MobiusInputSampler

com.sap.hadoop.windowing.io.TableWindowingInput

com.taobao.zeus.jobs.sub.tool.DataPreviewJob

org.apache.accumulo.core.client.mapred.AccumuloInputFormatTest

org.apache.drill.exec.store.hive.HiveSubScan

org.apache.hadoop.contrib.mongoreduce.MongoStreamInputFormat

org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat

org.apache.hadoop.hive.accumulo.mr.TestHiveAccumuloTableInputFormat

org.apache.hadoop.hive.ql.exec.StatsNoJobTask

org.apache.hadoop.hive.ql.exec.StatsNoJobTask$StatsCollection

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.