Package parkour.hadoop

Source Code of parkour.hadoop.AvroKeyGroupingComparator

package parkour.hadoop;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Parser;
import org.apache.avro.generic.GenericData;
import org.apache.avro.hadoop.io.AvroSerialization;
import org.apache.avro.io.BinaryData;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.Job;

public class AvroKeyGroupingComparator<T>
    extends Configured
    implements RawComparator<AvroKey<T>> {

  public static final String
    CONF_GROUPING_SCHEMA = "parkour.avro.grouping.schema";

  private GenericData mDataModel;
  private Schema mSchema;

  public static void setGroupingSchema(Job job, Schema schema) {
    job.setGroupingComparatorClass(AvroKeyGroupingComparator.class);
    job.getConfiguration().set(CONF_GROUPING_SCHEMA, schema.toString());
  }

  @Override
  public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf == null) return;
    mDataModel = AvroSerialization.createDataModel(conf);
    String schemaString = conf.get(CONF_GROUPING_SCHEMA);
    if (schemaString != null) mSchema = new Parser().parse(schemaString);
  }

  @Override
  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    return BinaryData.compare(b1, s1, b2, s2, mSchema);
  }

  @Override
  public int compare(AvroKey<T> x, AvroKey<T> y) {
    return mDataModel.compare(x.datum(), y.datum(), mSchema);
  }
}
TOP

Related Classes of parkour.hadoop.AvroKeyGroupingComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.