Source Code of com.cloudera.iterativereduce.io.TextRecordParser

package com.cloudera.iterativereduce.io;


import java.io.IOException;


import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;


import com.cloudera.iterativereduce.Updateable;


/**
 * A basic wrapper that uses hadoop's native classes for record reading
 * 
 * - read: this is a cheap hack to save time. but it works
 * 
 *
 * @param <T>
 */
public class TextRecordParser<T extends Updateable> implements RecordParser<T> {


  TextInputFormat input_format = null;
  InputSplit split = null;
  JobConf jobConf = null;
  
  RecordReader<LongWritable, Text> reader = null;
  LongWritable key = null;
  
  final Reporter voidReporter = Reporter.NULL;
  boolean hasMore = true;
  
  /*
   * a hack to get this setup
   */
/*  public void setup(JobConf jobConf, InputSplit split) throws IOException {
    
    this.jobConf = jobConf;
    this.split = split;
    this.input_format = new TextInputFormat();


    this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    this.key = reader.createKey();
    
    
  }
*/  
  /**
   * 
   * just a dead simple way to do this
   *
   * - functionality from TestTextInputFormat::readSplit()
   * 
   * If returns true, then csv_line contains the next line
   * If returns false, then there is no next record
   * 
   * Will terminate when it hits the end of the split based on the information provided in the split class
   * to the constructor and the TextInputFormat
   * 
   * @param csv_line
   * @throws IOException 
   */
  public boolean next(Text csv_line) throws IOException {
    
    hasMore = reader.next(key, csv_line);
    return hasMore;
    
  }
  


  
  @Override
  public int getCurrentRecordsProcessed() {
    // TODO Auto-generated method stub
    return 0;
  }


  @Override
  public boolean hasMoreRecords() {
    
  //  return hasMore;
    try {
    if ( this.reader.getProgress() < 1.0 ) {
        return true;
      }
  } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
    
    return false;
  }


  @Override
  public T nextRecord() {
    // TODO Auto-generated method stub
    return null;
  }


  /**
   * use this to setup the input format
   */
  @Override
  public void parse() {
    // TODO Auto-generated method stub
    
  }


  @Override
  public void reset() {
    // TODO Auto-generated method stub
    try {
      this.hasMore = true;
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }


  @Override
  public void setFile(String file, long offset, long length) {
    JobConf defaultConf = new JobConf();
    this.split = new FileSplit( new Path( file ), offset, length, defaultConf); 


    this.hasMore = true;
    this.jobConf = defaultConf;
    //this.split = split;
    this.input_format = new TextInputFormat();


    try {
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    this.key = reader.createKey();
    
  }


  @Override
  public void setFile(String file) {
    // TODO Auto-generated method stub
    
  }


}
Source Code of com.cloudera.iterativereduce.io.TextRecordParser

Related Classes of com.cloudera.iterativereduce.io.TextRecordParser