Examples of TextInputFormat

com.volantis.mcs.protocols.forms.validation.TextInputFormat
eu.stratosphere.api.java.io.TextInputFormat
eu.stratosphere.api.java.record.io.TextInputFormat
Base implementation for an input format that returns each line as a separate record that contains only a single string, namely the line.
org.apache.flink.api.java.io.TextInputFormat
org.apache.flink.api.java.record.io.TextInputFormat
Base implementation for an input format that returns each line as a separate record that contains only a single string, namely the line.
org.apache.hadoop.mapred.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..
org.apache.hadoop.mapreduce.lib.input.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

  @Override
  protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    
    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    
    int nbSplits = splits.size();
    log.debug("Nb splits : " + nbSplits);


    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);


    int numTrees = Builder.getNbTrees(conf); // total number of trees


    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());


    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);


    firstIds = new int[nbSplits];
    sizes = new int[nbSplits];
    
    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
      InputSplit split = splits.get(p);
      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
      
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
      reader.initialize(split, task);
      
      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),
          hp, nbSplits, numTrees);

View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

  protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback)
      throws IOException, InterruptedException {
    JobContext jobContext = new JobContext(conf, new JobID());
    
    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(jobContext);
    
    int nbSplits = splits.size();
    log.debug("Nb splits : " + nbSplits);


    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);


    int numTrees = Builder.getNbTrees(conf); // total number of trees


    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < nbSplits; p++) {
      total += Step2Mapper.nbConcerned(nbSplits, numTrees, p);
    }


    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());


    secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees);
    long slowest = 0; // duration of slowest map


    for (int partition = 0; partition < nbSplits; partition++) {
      
      InputSplit split = sorted[partition];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);


      // load the output of the 1st step
      int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition);
      TreeID[] fsKeys = new TreeID[nbConcerned];
      Node[] fsTrees = new Node[nbConcerned];

View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

    FileInputFormat.setInputPaths(job, dataPath);


    setMaxSplitSize(job.getConfiguration(), dataPath, numMaps);


    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(numMaps, splits.size());


    InputSplit[] sorted = new InputSplit[numMaps];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);


    context = new Step0Context(new Step0Mapper(), job.getConfiguration(),
        new TaskAttemptID(), numMaps);


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];


      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);


      Step0Mapper mapper = new Step0Mapper();
      mapper.configure(p);

View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

    FileInputFormat.setInputPaths(job, dataPath);


    setMaxSplitSize(job.getConfiguration(), dataPath, numMaps);


    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(numMaps, splits.size());


    InputSplit[] sorted = new InputSplit[numMaps];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);


    List<Integer> keys = new ArrayList<Integer>();
    List<Step0Output> values = new ArrayList<Step0Output>();


    int[] expectedIds = new int[numMaps];


    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);


      Long firstKey = null;
      int size = 0;

View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

            } else {
                result = new PigAvroInputFormat(
                        inputAvroSchema, ignoreBadFiles, schemaToMergedSchemaMap);
            }
        } else {
            result = new TextInputFormat();
        }
        return result;
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.