Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.TextInputFormat$LineRecordReader$TextStuffer


  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();

    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(targetSplit, job,
          reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil
          .handleRecordReaderCreationException(e, job);
    }
View Full Code Here


    if (targetPaths.size() == 0) {
      return new InputSplit[0];
    }

    // The input should be in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    JobConf newjob = new JobConf(job);
    newjob.setInputFormat(TextInputFormat.class);
    inputFormat.configure(newjob);

    List<InputSplit> result = new ArrayList<InputSplit>();

    // ceil(numSplits / numPaths), so we can get at least numSplits splits.
    int numPaths = targetPaths.size();
    int numSubSplits = (numSplits + numPaths - 1) / numPaths;

    // For each path, do getSplits().
    for (int i = 0; i < numPaths; ++i) {
      Path targetPath = targetPaths.get(i);
      Path symlinkPath = symlinkPaths.get(i);

      FileInputFormat.setInputPaths(newjob, targetPath);

      InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits);
      for (InputSplit is : iss) {
        result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
      }
    }
    return result.toArray(new InputSplit[result.size()]);
View Full Code Here

      Collections.sort(columnIds);
      numCols = columnIds.size();
    }
    targetRecordCount = context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BATCH_SIZE);

    TextInputFormat inputFormat = new TextInputFormat();
    JobConf job = new JobConf();
    job.setInt("io.file.buffer.size", context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
    job.setInputFormat(inputFormat.getClass());
    try {
      reader = inputFormat.getRecordReader(split, job, Reporter.NULL);
      key = reader.createKey();
      value = reader.createValue();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
View Full Code Here

public class KeyValuePreservingTextInputFormat extends FileInputFormat<Text, Text> {

  private TextInputFormat delegate;

  public void configure(JobConf conf) {
    delegate = new TextInputFormat();
    delegate.configure(conf);
  }
View Full Code Here

    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");
   
   
    HadoopDataSource source = new HadoopDataSource(new TextInputFormat(), new JobConf(), "Input Lines");
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
   
    // Example with Wrapper Converter
    HadoopDataSource<LongWritable,Text> sourceHadoopType = new HadoopDataSource<LongWritable, Text>(
        new TextInputFormat(), new JobConf(), "Input Lines", new WritableWrapperConverter<LongWritable, Text>());
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
   
    MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
View Full Code Here

    final String outputPath = args[1];
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    // Set up the Hadoop Input Format
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
    TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
   
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
   
View Full Code Here

    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");

    HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
        new TextInputFormat(), new JobConf(), "Input Lines");
    TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));


    MapOperator mapper = MapOperator.builder(new TokenizeLine())
        .input(source)
View Full Code Here

    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    int numSplits = 1;

    InputSplit[] splits = null;

    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
View Full Code Here

    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    int numSplits = 1;

    InputSplit[] splits = null;

    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
View Full Code Here

    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, input_path);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    int numSplits = 1;

    InputSplit[] splits = null;

    try {
      splits = format.getSplits(job, numSplits);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.TextInputFormat$LineRecordReader$TextStuffer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.