Package com.twitter.elephantbird.pig.load

Source Code of com.twitter.elephantbird.pig.load.RCFileThriftPigLoader

package com.twitter.elephantbird.pig.load;

import java.io.IOException;

import com.twitter.elephantbird.mapreduce.input.RCFileThriftTupleInputFormat;
import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;

import com.twitter.elephantbird.util.RCFileUtil;

/**
* Pig loader for Thrift object stored in RCFiles.
*/
public class RCFileThriftPigLoader extends ThriftPigLoader<TBase<?,?>> {

  private RCFileThriftTupleInputFormat.TupleReader thriftReader;

  /**
   * @param thriftClassName fully qualified name of the thrift class
   */
  public RCFileThriftPigLoader(String thriftClassName) {
    super(thriftClassName);
  }

  @Override @SuppressWarnings("unchecked")
  public InputFormat getInputFormat() throws IOException {
    return new RCFileThriftTupleInputFormat(typeRef);
  }

  @Override
  public Tuple getNext() throws IOException {
    if (thriftReader.isReadingUnknonwsColumn()) {
      //do normal bytes -> thrift -> tuple
      return super.getNext();
    }

    // otherwise bytes -> tuple
    try {
      if (thriftReader.nextKeyValue()) {
        return thriftReader.getCurrentTupleValue();
      }
    } catch (TException e) {
      throw new IOException(e);
    } catch (InterruptedException e) {
      throw new IOException(e);
    }

    return null;
  }

  @Override @SuppressWarnings("unchecked")
  public void prepareToRead(RecordReader reader, PigSplit split) {
    super.prepareToRead(reader, split);
    thriftReader = (RCFileThriftTupleInputFormat.TupleReader) reader;
  }

  @Override
  public void setLocation(String location, Job job) throws IOException {
    super.setLocation(location, job);
    RCFileUtil.setRequiredFieldConf(HadoopCompat.getConfiguration(job),
                                    requiredFieldList);
  }

}
TOP

Related Classes of com.twitter.elephantbird.pig.load.RCFileThriftPigLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.