Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

        RCFileInputFormat inputFormat = new RCFileInputFormat();
        @SuppressWarnings("deprecation")
        SerDe serde = new ColumnarSerDe();
        File file = File.createTempFile("presto_test", "rc-text");
        try {
            FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null);
            @SuppressWarnings("unchecked")
            RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat.getRecordReader(split, jobConf, Reporter.NULL);
            Properties splitProperties = new Properties();
            splitProperties.setProperty("serialization.lib", "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe");
            splitProperties.setProperty("columns", COLUMN_NAMES_STRING);
            splitProperties.setProperty("columns.types", COLUMN_TYPES);
            RecordCursor cursor = new ColumnarTextHiveRecordCursor<>(recordReader,
                    split.getLength(),
                    splitProperties,
                    new ArrayList<HivePartitionKey>(),
                    getColumns(),
                    DateTimeZone.getDefault(),
                    DateTimeZone.getDefault(),

View Full Code Here

        RCFileInputFormat inputFormat = new RCFileInputFormat();
        @SuppressWarnings("deprecation")
        SerDe serde = new LazyBinaryColumnarSerDe();
        File file = File.createTempFile("presto_test", "rc-binary");
        try {
            FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null);
            @SuppressWarnings("unchecked")
            RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat.getRecordReader(split, jobConf, Reporter.NULL);
            Properties splitProperties = new Properties();
            splitProperties.setProperty("serialization.lib", "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe");
            splitProperties.setProperty("columns", COLUMN_NAMES_STRING);
            splitProperties.setProperty("columns.types", COLUMN_TYPES);
            RecordCursor cursor = new ColumnarBinaryHiveRecordCursor<>(recordReader,
                    split.getLength(),
                    splitProperties,
                    new ArrayList<HivePartitionKey>(),
                    getColumns(),
                    DateTimeZone.getDefault(),
                    new TypeRegistry());

View Full Code Here

  }




  private void checkSplit(InputSplit split, long start, long length) {
    assertThat(split, instanceOf(FileSplit.class));
    FileSplit fileSplit = (FileSplit) split;
    assertThat(fileSplit.getStart(), is(start));
    assertThat(fileSplit.getLength(), is(length));
  }

View Full Code Here

    AvroJob.setInputSchema(job, readerSchema);
    
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/part-00000" + AvroOutputFormat.EXT);
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    
    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
    
    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);
    NullWritable ignore = NullWritable.get();

View Full Code Here

      return sideFs_.create(new Path(uri.getSchemeSpecificPart()));
    }
  }


  String getSideEffectFileName() {
    FileSplit split = StreamUtil.getCurrentSplit(job_);
    String leaf = split.getPath().getName();
    if (split.getStart() == 0) {
      return leaf;
    } else {
      return new FileSplit(new Path(leaf), split.getStart(), 
                           split.getLength(), job_).toString();
    }
  }

View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length, job);
  }

View Full Code Here

    if (length < 0 || (start + length) > fileLen) {
      length = fileLen - start;
    }


    //share the code with RecordReader.
    FileSplit split = new FileSplit(fileName,start, length, new JobConf(conf));
    RCFileRecordReader recordReader = new RCFileRecordReader(conf, split);
    LongWritable key = new LongWritable();
    BytesRefArrayWritable value = new BytesRefArrayWritable();
    StringBuilder buf = new StringBuilder(STRING_BUFFER_SIZE); // extra capacity in case we overrun, to avoid resizing
    while (recordReader.next(key, value)) {

View Full Code Here

    }
    InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache(
        inputFormatClass, jobConf);


    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
        .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
        .getLocations());


    this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter));

View Full Code Here

  public static class SymlinkTextInputSplit extends FileSplit {
    private final FileSplit split;


    public SymlinkTextInputSplit() {
      super((Path)null, 0, 0, (String[])null);
      split = new FileSplit((Path)null, 0, 0, (String[])null);
    }

View Full Code Here

  public void initIOContext(FileSplit split, JobConf job,
      Class inputFormatClass, RecordReader recordReader) throws IOException {


    boolean blockPointer = false;
    long blockStart = -1;
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(job);
    if (inputFormatClass.getName().contains("SequenceFile")) {
      SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
      blockPointer = in.isBlockCompressed();
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    } else if (recordReader instanceof RCFileRecordReader) {
      blockPointer = true;
      blockStart = ((RCFileRecordReader) recordReader).getStart();
    } else if (inputFormatClass.getName().contains("RCFile")) {
      blockPointer = true;
      RCFile.Reader in = new RCFile.Reader(fs, path, job);
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    }
    this.initIOContext(blockStart, blockPointer, split.getPath().toString());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.