Examples of TextInputFormat

com.volantis.mcs.protocols.forms.validation.TextInputFormat
eu.stratosphere.api.java.io.TextInputFormat
eu.stratosphere.api.java.record.io.TextInputFormat
Base implementation for an input format that returns each line as a separate record that contains only a single string, namely the line.
org.apache.flink.api.java.io.TextInputFormat
org.apache.flink.api.java.record.io.TextInputFormat
Base implementation for an input format that returns each line as a separate record that contains only a single string, namely the line.
org.apache.hadoop.mapred.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..
org.apache.hadoop.mapreduce.lib.input.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..

Examples of org.apache.hadoop.mapred.TextInputFormat

    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
              "is\ngzip\n");
    writeFile(localFs, new Path(workDir, "part3.txt.gz"), gzip,
        "one\nmore\nsplit\n");
    FileInputFormat.setInputPaths(job, workDir);
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(wrappedFormat);

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

  }


  @Override
  protected void runJob(JobConf job) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : " + splits.length);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    int numTrees = Builder.getNbTrees(job); // total number of trees


    firstOutput = new PartialOutputCollector(numTrees);
    Reporter reporter = Reporter.NULL;


    firstIds = new int[splits.length];
    sizes = new int[splits.length];
    
    // to compute firstIds, process the splits in file order
    int firstId = 0;
    long slowest = 0; // duration of slowest map
    for (InputSplit split : splits) {
      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition


      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

   * 
   */
  protected void secondStep(JobConf job, Path forestPath,
      PredictionCallback callback) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : " + splits.length);


    Builder.sortSplits(splits);


    int numTrees = Builder.getNbTrees(job); // total number of trees


    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < splits.length; p++) {
      total += Step2Mapper.nbConcerned(splits.length, numTrees, p);
    }


    secondOutput = new PartialOutputCollector(total);
    Reporter reporter = Reporter.NULL;
    long slowest = 0; // duration of slowest map


    for (int partition = 0; partition < splits.length; partition++) {
      InputSplit split = splits[partition];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split,
          job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    job.setNumMapTasks(numMaps);


    FileInputFormat.setInputPaths(job, dataPath);


    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Step0Mapper mapper = new Step0Mapper();

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);


    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    Reporter reporter = Reporter.NULL;


    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];
    
    int[] expectedIds = new int[numMaps];
    
    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Long firstKey = null;

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    final int numFiles = 10;


    createFiles(length, numFiles, random);


    // create a combined split for the files
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setDesiredNumberOfSplits(1);
    format.setInputFormat(wrappedFormat);

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
              "is\ngzip\n");
    writeFile(localFs, new Path(workDir, "part3.txt.gz"), gzip,
        "one\nmore\nsplit\n");
    FileInputFormat.setInputPaths(job, workDir);
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(wrappedFormat);

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

      }
      Collections.sort(columnIds);
    }
    targetRecordCount = context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BATCH_SIZE);
    numCols = columnIds.size();
    TextInputFormat inputFormat = new TextInputFormat();
    JobConf job = new JobConf();
    job.setInt("io.file.buffer.size", context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
    job.setInputFormat(inputFormat.getClass());
    try {
      reader = inputFormat.getRecordReader(split, job, Reporter.NULL);
      key = reader.createKey();
      value = reader.createValue();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat

    this.split = new FileSplit( new Path( file ), offset, length, defaultConf); 


    this.hasMore = true;
    this.jobConf = defaultConf;
    //this.split = split;
    this.input_format = new TextInputFormat();


    try {
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block

View Full Code Here

Examples of org.apache.hadoop.mapred.TextInputFormat


    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);


    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);


    int numSplits = 1;


    InputSplit[] splits = null;


    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.