Examples of org.apache.hadoop.mapred.FileSplit

org.apache.hadoop.mapred.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobConf,int)} and passed to{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here

      return sideFs_.create(new Path(uri.getSchemeSpecificPart()));
    }
  }


  String getSideEffectFileName() {
    FileSplit split = StreamUtil.getCurrentSplit(job_);
    String leaf = split.getPath().getName();
    if (split.getStart() == 0) {
      return leaf;
    } else {
      return new FileSplit(new Path(leaf), split.getStart(), split.getLength()).toString();
    }
  }

View Full Code Here

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length);
  }

View Full Code Here

  public void initIOContext(FileSplit split, JobConf job,
      Class inputFormatClass, RecordReader recordReader) throws IOException {


    boolean blockPointer = false;
    long blockStart = -1;
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(job);
    if (inputFormatClass.getName().contains("SequenceFile")) {
      SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
      blockPointer = in.isBlockCompressed();
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    } else if (recordReader instanceof RCFileRecordReader) {
      blockPointer = true;
      blockStart = ((RCFileRecordReader) recordReader).getStart();
    } else if (inputFormatClass.getName().contains("RCFile")) {
      blockPointer = true;
      RCFile.Reader in = new RCFile.Reader(fs, path, job);
      in.sync(fileSplit.getStart());
      blockStart = in.getPosition();
      in.close();
    }
    this.initIOContext(blockStart, blockPointer, path.makeQualified(fs).toString());

View Full Code Here

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }


      return splits.toArray(new FileSplit[splits.size()]);
    }

View Full Code Here


  @Override
  public RecordReader getRecordReader(FragmentContext context, FileWork fileWork,
      List<SchemaPath> columns) throws ExecutionSetupException {
    Path path = getFileSystem().getUnderlying().makeQualified(new Path(fileWork.getPath()));
    FileSplit split = new FileSplit(path, fileWork.getStart(), fileWork.getLength(), new String[]{""});
    Preconditions.checkArgument(((TextFormatConfig)formatConfig).getDelimiter().length() == 1, "Only single character delimiter supported");
    return new DrillTextRecordReader(split, context, ((TextFormatConfig) formatConfig).getDelimiter().charAt(0), columns);
  }

View Full Code Here

        InputSplit genericSplit, JobConf job,
        Reporter reporter)
        throws IOException {


      reporter.setStatus(genericSplit.toString());
      FileSplit split = (FileSplit)genericSplit;
      final Path file = split.getPath();
      FileSystem fs = file.getFileSystem(job);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (compressionCodecs != null && compressionCodecs.getCodec(file) != null)
        throw new RuntimeException("Not handling compression!");


      return new StreamXmlRecordReader(fileIn, split, reporter, job, FileSystem.get(job));
    }

View Full Code Here

    }
    finally { 
      os.close();
    }
    
    FileSplit split = new FileSplit(path, 0, fs.getFileStatus(path).getLen(), new String[0]);
    ARCFileRecordReader reader = new ARCFileRecordReader();
    reader.initialize(conf,split);
    
    int index = 0;

View Full Code Here

  private long start;
  private long end;


  public void initialize(Configuration conf,InputSplit split)throws IOException {
    this.conf = conf;
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    InputStream in = null;
    if (fs instanceof NativeS3FileSystem) {
      if (conf.getBoolean(ARCFileInputFormat.USE_S3_INPUTSTREAM, false)) {
        in = new S3InputStream(path.toUri(), conf.get("fs.s3n.awsAccessKeyId"), conf.get("fs.s3n.awsSecretAccessKey"), 1048576);
      }
      else { 
        Jets3tProperties properties = Jets3tProperties.getInstance(org.jets3t.service.Constants.JETS3T_PROPERTIES_FILENAME);
        properties.setProperty("s3service.https-only","false");
      }
    }
    if (in == null) { 
      in = fs.open(path);
    }
    try { 
      reader = new ARCFileReader(in);
    }
    catch (IOException e) { 
      LOG.error(CCStringUtils.stringifyException(e));
      in.close();
      throw e;
    }
    start = fileSplit.getStart();
    end   = fileSplit.getLength();
    if (start != 0 || fs.getFileStatus(path).getLen() != end) { 
      throw new IOException("Invalid FileSplit encountered! Split Details:" + split.toString());
    }
  }

View Full Code Here

  }


  @Override
  public void setFile(String file, long offset, long length) {
    JobConf defaultConf = new JobConf();
    this.split = new FileSplit( new Path( file ), offset, length, defaultConf); 


    this.hasMore = true;
    this.jobConf = defaultConf;
    //this.split = split;
    this.input_format = new TextInputFormat();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.FileSplit

ar.ext.spark.hbin.HBINInputFormat

cascading.tap.hadoop.io.CombineFileRecordReaderWrapper

com.alexholmes.hadooputils.combine.common.mapred.CommonCombineRecordReader

com.cloudera.iterativereduce.io.TextRecordParser

com.ebay.erl.mobius.core.mapred.FileInputFormatHelper

com.esri.json.hadoop.TestUnenclosedJsonRecordReader

com.facebook.hive.orc.OrcInputFormat

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.facebook.presto.hive.BenchmarkHiveFileFormats$BenchmarkFile

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.