Package com.twitter.elephantbird.pig.load

Source Code of com.twitter.elephantbird.pig.load.RCFileProtobufPigLoader

package com.twitter.elephantbird.pig.load;

import java.io.IOException;

import com.twitter.elephantbird.mapreduce.input.RCFileProtobufTupleInputFormat;
import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;

import com.google.protobuf.Message;
import com.twitter.elephantbird.util.RCFileUtil;

/**
* Pig loader for Protobufs stored in RCFiles.
*/
public class RCFileProtobufPigLoader extends ProtobufPigLoader<Message> {

  private RCFileProtobufTupleInputFormat.TupleReader protoReader;

  /**
   * @param protoClassName fully qualified name of the protobuf class
   */
  public RCFileProtobufPigLoader(String protoClassName) {
    super(protoClassName);
  }

  @Override @SuppressWarnings("unchecked")
  public InputFormat getInputFormat() throws IOException {
    return new RCFileProtobufTupleInputFormat(typeRef);
  }

  @Override
  public Tuple getNext() throws IOException {
    if (protoReader.isReadingUnknonwsColumn()) {
      //do normal bytes -> protobuf message -> tuple
      return super.getNext();
    }

    // otherwise bytes -> tuple
    try {
      if (protoReader.nextKeyValue()) {
        return protoReader.getCurrentTupleValue();
      }
    } catch (InterruptedException e) {
      throw new IOException(e);
    }

    return null;
  }

  @Override @SuppressWarnings("unchecked")
  public void prepareToRead(RecordReader reader, PigSplit split) {
    super.prepareToRead(reader, split);
    protoReader = (RCFileProtobufTupleInputFormat.TupleReader) reader;
  }

  @Override
  public void setLocation(String location, Job job) throws IOException {
    super.setLocation(location, job);
    RCFileUtil.setRequiredFieldConf(HadoopCompat.getConfiguration(job),
                                    requiredFieldList);
  }

}
TOP

Related Classes of com.twitter.elephantbird.pig.load.RCFileProtobufPigLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.