Examples of TextInputFormat


Examples of org.apache.hadoop.mapred.TextInputFormat

    job.setNumMapTasks(numMaps);

    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Step0Mapper mapper = new Step0Mapper();
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Reporter reporter = Reporter.NULL;

    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];
   
    int[] expectedIds = new int[numMaps];
   
    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Long firstKey = null;
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();

    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    return inputFormat.getRecordReader(targetSplit, job, reporter);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    if (targetPaths.size() == 0) {
      return new InputSplit[0];
    }

    // The input should be in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    JobConf newjob = new JobConf(job);
    newjob.setInputFormat(TextInputFormat.class);
    inputFormat.configure(newjob);

    List<InputSplit> result = new ArrayList<InputSplit>();

    // ceil(numSplits / numPaths), so we can get at least numSplits splits.
    int numPaths = targetPaths.size();
    int numSubSplits = (numSplits + numPaths - 1) / numPaths;

    // For each path, do getSplits().
    for (int i = 0; i < numPaths; ++i) {
      Path targetPath = targetPaths.get(i);
      Path symlinkPath = symlinkPaths.get(i);

      FileInputFormat.setInputPaths(newjob, targetPath);

      InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits);
      for (InputSplit is : iss) {
        result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
      }
    }
    return result.toArray(new InputSplit[result.size()]);
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

  TextInputFormat format;
  JobConf job;

  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

 
  TextInputFormat format;
  JobConf job;
 
  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

  TextInputFormat format;
  JobConf job;

  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();

    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(targetSplit, job,
          reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil
          .handleRecordReaderCreationException(e, job);
    }
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    if (targetPaths.size() == 0) {
      return new InputSplit[0];
    }

    // The input should be in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    JobConf newjob = new JobConf(job);
    newjob.setInputFormat(TextInputFormat.class);
    inputFormat.configure(newjob);

    List<InputSplit> result = new ArrayList<InputSplit>();

    // ceil(numSplits / numPaths), so we can get at least numSplits splits.
    int numPaths = targetPaths.size();
    int numSubSplits = (numSplits + numPaths - 1) / numPaths;

    // For each path, do getSplits().
    for (int i = 0; i < numPaths; ++i) {
      Path targetPath = targetPaths.get(i);
      Path symlinkPath = symlinkPaths.get(i);

      FileInputFormat.setInputPaths(newjob, targetPath);

      InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits);
      for (InputSplit is : iss) {
        result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
      }
    }
    return result.toArray(new InputSplit[result.size()]);
View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    final int numFiles = 10;

    createFiles(length, numFiles, random);

    // create a combined split for the files
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format =
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setDesiredNumberOfSplits(1);
    format.setInputFormat(wrappedFormat);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.