Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.FileSplit


    AvroJob.setInputSchema(job, readerSchema);
   
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);

   
    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
   
    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);
View Full Code Here


    AvroJob.setInputSchema(job, readerSchema);

    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/myavro1-r-00000.avro");
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
    AvroWrapper<Utf8> inputPair = new AvroWrapper<Utf8>(null);
    NullWritable ignore = NullWritable.get();
    AvroRecordReader<Utf8> recordReader = new AvroRecordReader<Utf8>(job, fileSplit);
    long sumOfCounts = 0;
    long numOfCounts = 0;
View Full Code Here

    private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration, Path wrappedPath)
    {
        final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
        final JobConf jobConf = new JobConf(configuration);
        final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());

        // propagate serialization configuration to getRecordReader
        for (String name : split.getSchema().stringPropertyNames()) {
            if (name.startsWith("serialization.")) {
                jobConf.set(name, split.getSchema().getProperty(name));
View Full Code Here

        }
    }

    private static FileSplit createFileSplit(final Path path, long start, long length)
    {
        return new FileSplit(path, start, length, (String[]) null)
        {
            @Override
            public Path getPath()
            {
                // make sure our original path object is returned
View Full Code Here

                if (inputFormat instanceof SymlinkTextInputFormat) {
                    JobConf jobConf = new JobConf(configuration);
                    FileInputFormat.setInputPaths(jobConf, path);
                    InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                    for (InputSplit rawSplit : splits) {
                        FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit).getTargetSplit();

                        // get the filesystem for the target path -- it may be a different hdfs instance
                        FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                        FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                        hiveSplitSource.addToQueue(createHiveSplits(
                                partitionName,
                                fileStatus,
                                targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()),
                                split.getStart(),
                                split.getLength(),
                                schema,
                                partitionKeys,
                                false));
                    }
                    continue;
View Full Code Here

    JobConf conf = PigInputFormat.sJob;
    if (conf == null) {
      conf = new JobConf();
    }
   
    FileSplit split = new FileSplit(new Path(fileName), offset, end - offset,
        (String[]) null);
    reader = new SequenceFileRecordReader<ChukwaRecordKey, ChukwaRecord>(conf,
        split);
    if (reader.getValueClass() != ChukwaRecord.class)
      throw new IOException(
View Full Code Here

    private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration, Path wrappedPath)
    {
        final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
        final JobConf jobConf = new JobConf(configuration);
        final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());

        try {
            return retry().stopOnIllegalExceptions().run("createRecordReader", new Callable<RecordReader<?, ?>>()
            {
                @Override
View Full Code Here

        }
    }

    private static FileSplit createFileSplit(final Path path, long start, long length)
    {
        return new FileSplit(path, start, length, (String[]) null)
        {
            @Override
            public Path getPath()
            {
                // make sure our original path object is returned
View Full Code Here

                if (inputFormat instanceof SymlinkTextInputFormat) {
                    JobConf jobConf = new JobConf(configuration);
                    FileInputFormat.setInputPaths(jobConf, partitionPath);
                    InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                    for (InputSplit rawSplit : splits) {
                        FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit).getTargetSplit();

                        // get the filesystem for the target path -- it may be a different hdfs instance
                        FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                        FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                        markerQueue.addToQueue(createHiveSplits(
                                partitionName,
                                fileStatus,
                                targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()),
                                split.getStart(),
                                split.getLength(),
                                schema,
                                partitionKeys,
                                false));
                    }
                    markerQueue.finish();
View Full Code Here

    public InputSplit[] getSplits(JobConf job,
                                  int numSplits) throws IOException {
      InputSplit[] result = new InputSplit[numSplits];
      Path outDir = job.getOutputPath();
      for(int i=0; i < result.length; ++i) {
        result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, job);
      }
      return result;
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.