Examples of org.apache.hadoop.mapred.TextInputFormat$LineRecordReader$TextStuffer

org.apache.hadoop.mapred.TextInputFormat
Provide a bridge to get the bytes from the ByteArrayOutputStream without creating a new byte array.


    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);


    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);


    int numSplits = 1;


    InputSplit[] splits = null;


    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

View Full Code Here


    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);


    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);


    int numSplits = 1;


    InputSplit[] splits = null;


    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

View Full Code Here

  private static class TextRecordReaderWrapper
    extends CombineFileRecordReaderWrapper<LongWritable,Text> {
    // this constructor signature is required by CombineFileRecordReader
    public TextRecordReaderWrapper(CombineFileSplit split, Configuration conf,
      Reporter reporter, Integer idx) throws IOException {
      super(new TextInputFormat(), split, conf, reporter, idx);
    }

View Full Code Here

      Collections.sort(columnIds);
      numCols = columnIds.size();
    }
    targetRecordCount = context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BATCH_SIZE);


    TextInputFormat inputFormat = new TextInputFormat();
    JobConf job = new JobConf();
    job.setInt("io.file.buffer.size", context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
    job.setInputFormat(inputFormat.getClass());
    try {
      reader = inputFormat.getRecordReader(split, job, Reporter.NULL);
      key = reader.createKey();
      value = reader.createValue();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

View Full Code Here

    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");


    HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
        new TextInputFormat(), new JobConf(), "Input Lines");
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));




    MapOperator mapper = MapOperator.builder(new TokenizeLine())
        .input(source)

View Full Code Here

    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");
    
    
    HadoopDataSource source = new HadoopDataSource(new TextInputFormat(), new JobConf(), "Input Lines");
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
    
    // Example with Wrapper Converter
    HadoopDataSource<LongWritable,Text> sourceHadoopType = new HadoopDataSource<LongWritable, Text>(
        new TextInputFormat(), new JobConf(), "Input Lines", new WritableWrapperConverter<LongWritable, Text>());
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
    
    MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")

View Full Code Here

    // Make TextInputFormat recognize .ssf via config
    JobConf job = new JobConf();
    job.set("fs.default.name", "file:///");
    // System.out.println("FS DEFAULT: " + job.get("fs.default.name"));
    job.set("io.compression.codecs", SimpleSeekableFormatCodec.class.getName());
    TextInputFormat textInputFormat = new TextInputFormat();
    textInputFormat.configure(job);


    // Open the file using TextInputFormat
    TextInputFormat.addInputPath(job, path);
    InputSplit[] splits = textInputFormat.getSplits(job, 1);
    Assert.assertEquals(1, splits.length);
    RecordReader<LongWritable, Text> recordReader = textInputFormat.getRecordReader(splits[0],
        job, Reporter.NULL);


    // Verify the data
    LongWritable key = recordReader.createKey();
    Text value = recordReader.createValue();

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.hadoop.mapred.TextInputFormat$LineRecordReader$TextStuffer

com.cloudera.iterativereduce.io.TextRecordParser

com.cloudera.iterativereduce.irunit.IRUnitDriver

com.m6d.filecrush.crush.KeyValuePreservingTextInputFormat

eu.stratosphere.hadoopcompatibility.example.WordCount

eu.stratosphere.hadoopcompatibility.example.WordCountWithHadoopOutputFormat

org.apache.drill.exec.store.text.DrillTextRecordReader

org.apache.flink.hadoopcompatibility.mapred.example.HadoopMapredCompatWordCount

org.apache.flink.hadoopcompatibility.mapred.record.example.WordCount

org.apache.flink.hadoopcompatibility.mapred.record.example.WordCountWithOutputFormat

org.apache.hadoop.fs.FSDataInputStream

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.