package com.twitter.elephantbird.pig.load;
import java.io.IOException;
import com.twitter.elephantbird.mapreduce.input.RCFileProtobufTupleInputFormat;
import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import com.google.protobuf.Message;
import com.twitter.elephantbird.util.RCFileUtil;
/**
* Pig loader for Protobufs stored in RCFiles.
*/
public class RCFileProtobufPigLoader extends ProtobufPigLoader<Message> {
private RCFileProtobufTupleInputFormat.TupleReader protoReader;
/**
* @param protoClassName fully qualified name of the protobuf class
*/
public RCFileProtobufPigLoader(String protoClassName) {
super(protoClassName);
}
@Override @SuppressWarnings("unchecked")
public InputFormat getInputFormat() throws IOException {
return new RCFileProtobufTupleInputFormat(typeRef);
}
@Override
public Tuple getNext() throws IOException {
if (protoReader.isReadingUnknonwsColumn()) {
//do normal bytes -> protobuf message -> tuple
return super.getNext();
}
// otherwise bytes -> tuple
try {
if (protoReader.nextKeyValue()) {
return protoReader.getCurrentTupleValue();
}
} catch (InterruptedException e) {
throw new IOException(e);
}
return null;
}
@Override @SuppressWarnings("unchecked")
public void prepareToRead(RecordReader reader, PigSplit split) {
super.prepareToRead(reader, split);
protoReader = (RCFileProtobufTupleInputFormat.TupleReader) reader;
}
@Override
public void setLocation(String location, Job job) throws IOException {
super.setLocation(location, job);
RCFileUtil.setRequiredFieldConf(HadoopCompat.getConfiguration(job),
requiredFieldList);
}
}