Package com.cloudera.iterativereduce.io

Source Code of com.cloudera.iterativereduce.io.TextRecordParser

package com.cloudera.iterativereduce.io;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;

import com.cloudera.iterativereduce.Updateable;

/**
* A basic wrapper that uses hadoop's native classes for record reading
*
* - read: this is a cheap hack to save time. but it works
*
*
* @param <T>
*/
public class TextRecordParser<T extends Updateable> implements RecordParser<T> {

  TextInputFormat input_format = null;
  InputSplit split = null;
  JobConf jobConf = null;
 
  RecordReader<LongWritable, Text> reader = null;
  LongWritable key = null;
 
  final Reporter voidReporter = Reporter.NULL;
  boolean hasMore = true;
 
  /*
   * a hack to get this setup
   */
/*  public void setup(JobConf jobConf, InputSplit split) throws IOException {
   
    this.jobConf = jobConf;
    this.split = split;
    this.input_format = new TextInputFormat();

    this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    this.key = reader.createKey();
   
   
  }
*/ 
  /**
   *
   * just a dead simple way to do this
   *
   * - functionality from TestTextInputFormat::readSplit()
   *
   * If returns true, then csv_line contains the next line
   * If returns false, then there is no next record
   *
   * Will terminate when it hits the end of the split based on the information provided in the split class
   * to the constructor and the TextInputFormat
   *
   * @param csv_line
   * @throws IOException
   */
  public boolean next(Text csv_line) throws IOException {
   
    hasMore = reader.next(key, csv_line);
    return hasMore;
   
  }
 

 
  @Override
  public int getCurrentRecordsProcessed() {
    // TODO Auto-generated method stub
    return 0;
  }

  @Override
  public boolean hasMoreRecords() {
   
  //  return hasMore;
    try {
    if ( this.reader.getProgress() < 1.0 ) {
        return true;
      }
  } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
   
    return false;
  }

  @Override
  public T nextRecord() {
    // TODO Auto-generated method stub
    return null;
  }

  /**
   * use this to setup the input format
   */
  @Override
  public void parse() {
    // TODO Auto-generated method stub
   
  }

  @Override
  public void reset() {
    // TODO Auto-generated method stub
    try {
      this.hasMore = true;
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }

  @Override
  public void setFile(String file, long offset, long length) {
    JobConf defaultConf = new JobConf();
    this.split = new FileSplit( new Path( file ), offset, length, defaultConf);

    this.hasMore = true;
    this.jobConf = defaultConf;
    //this.split = split;
    this.input_format = new TextInputFormat();

    try {
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    this.key = reader.createKey();
   
  }

  @Override
  public void setFile(String file) {
    // TODO Auto-generated method stub
   
  }

}
TOP

Related Classes of com.cloudera.iterativereduce.io.TextRecordParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.