Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

    AvroJob.setInputSchema(job, readerSchema);
    
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);


    
    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
    
    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);

View Full Code Here

    AvroJob.setInputSchema(job, readerSchema);


    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro1-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    AvroWrapper<Utf8> inputPair = new AvroWrapper<Utf8>(null);
    NullWritable ignore = NullWritable.get();
    AvroRecordReader<Utf8> recordReader = new AvroRecordReader<Utf8>(job, fileSplit);
    long sumOfCounts = 0;
    long numOfCounts = 0;

View Full Code Here


    private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration, Path wrappedPath)
    {
        final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
        final JobConf jobConf = new JobConf(configuration);
        final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());


        // propagate serialization configuration to getRecordReader
        for (String name : split.getSchema().stringPropertyNames()) {
            if (name.startsWith("serialization.")) {
                jobConf.set(name, split.getSchema().getProperty(name));

View Full Code Here

        }
    }


    private static FileSplit createFileSplit(final Path path, long start, long length)
    {
        return new FileSplit(path, start, length, (String[]) null)
        {
            @Override
            public Path getPath()
            {
                // make sure our original path object is returned

View Full Code Here

                if (inputFormat instanceof SymlinkTextInputFormat) {
                    JobConf jobConf = new JobConf(configuration);
                    FileInputFormat.setInputPaths(jobConf, path);
                    InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                    for (InputSplit rawSplit : splits) {
                        FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit).getTargetSplit();


                        // get the filesystem for the target path -- it may be a different hdfs instance
                        FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                        FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                        hiveSplitSource.addToQueue(createHiveSplits(
                                partitionName,
                                fileStatus,
                                targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()),
                                split.getStart(),
                                split.getLength(),
                                schema,
                                partitionKeys,
                                false));
                    }
                    continue;

View Full Code Here

    JobConf conf = PigInputFormat.sJob;
    if (conf == null) {
      conf = new JobConf();
    }
    
    FileSplit split = new FileSplit(new Path(fileName), offset, end - offset,
        (String[]) null);
    reader = new SequenceFileRecordReader<ChukwaRecordKey, ChukwaRecord>(conf,
        split);
    if (reader.getValueClass() != ChukwaRecord.class)
      throw new IOException(

View Full Code Here


    private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration, Path wrappedPath)
    {
        final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
        final JobConf jobConf = new JobConf(configuration);
        final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());


        try {
            return retry().stopOnIllegalExceptions().run("createRecordReader", new Callable<RecordReader<?, ?>>()
            {
                @Override

View Full Code Here

        }
    }


    private static FileSplit createFileSplit(final Path path, long start, long length)
    {
        return new FileSplit(path, start, length, (String[]) null)
        {
            @Override
            public Path getPath()
            {
                // make sure our original path object is returned

View Full Code Here

                if (inputFormat instanceof SymlinkTextInputFormat) {
                    JobConf jobConf = new JobConf(configuration);
                    FileInputFormat.setInputPaths(jobConf, partitionPath);
                    InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                    for (InputSplit rawSplit : splits) {
                        FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit).getTargetSplit();


                        // get the filesystem for the target path -- it may be a different hdfs instance
                        FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                        FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                        markerQueue.addToQueue(createHiveSplits(
                                partitionName,
                                fileStatus,
                                targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()),
                                split.getStart(),
                                split.getLength(),
                                schema,
                                partitionKeys,
                                false));
                    }
                    markerQueue.finish();

View Full Code Here

    public InputSplit[] getSplits(JobConf job, 
                                  int numSplits) throws IOException {
      InputSplit[] result = new InputSplit[numSplits];
      Path outDir = job.getOutputPath();
      for(int i=0; i < result.length; ++i) {
        result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, job);
      }
      return result;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.