package com.twitter.elephantbird.pig.load;
import java.io.IOException;
import com.twitter.elephantbird.mapreduce.input.RCFileThriftTupleInputFormat;
import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;
import com.twitter.elephantbird.util.RCFileUtil;
/**
* Pig loader for Thrift object stored in RCFiles.
*/
public class RCFileThriftPigLoader extends ThriftPigLoader<TBase<?,?>> {
private RCFileThriftTupleInputFormat.TupleReader thriftReader;
/**
* @param thriftClassName fully qualified name of the thrift class
*/
public RCFileThriftPigLoader(String thriftClassName) {
super(thriftClassName);
}
@Override @SuppressWarnings("unchecked")
public InputFormat getInputFormat() throws IOException {
return new RCFileThriftTupleInputFormat(typeRef);
}
@Override
public Tuple getNext() throws IOException {
if (thriftReader.isReadingUnknonwsColumn()) {
//do normal bytes -> thrift -> tuple
return super.getNext();
}
// otherwise bytes -> tuple
try {
if (thriftReader.nextKeyValue()) {
return thriftReader.getCurrentTupleValue();
}
} catch (TException e) {
throw new IOException(e);
} catch (InterruptedException e) {
throw new IOException(e);
}
return null;
}
@Override @SuppressWarnings("unchecked")
public void prepareToRead(RecordReader reader, PigSplit split) {
super.prepareToRead(reader, split);
thriftReader = (RCFileThriftTupleInputFormat.TupleReader) reader;
}
@Override
public void setLocation(String location, Job job) throws IOException {
super.setLocation(location, job);
RCFileUtil.setRequiredFieldConf(HadoopCompat.getConfiguration(job),
requiredFieldList);
}
}