package com.ebay.erl.mobius.core.datajoin;
import java.io.DataInput;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.util.ReflectionUtils;
import com.ebay.erl.mobius.core.ConfigureConstants;
import com.ebay.erl.mobius.core.model.Tuple;
import com.ebay.erl.mobius.core.model.TupleColumnComparator;
import com.ebay.erl.mobius.core.sort.Sorter;
import com.ebay.erl.mobius.util.SerializableUtil;
import com.ebay.erl.mobius.util.Util;
/**
* <p>
* This product is licensed under the Apache License, Version 2.0,
* available at http://www.apache.org/licenses/LICENSE-2.0.
*
* This product contains portions derived from Apache hadoop which is
* licensed under the Apache License, Version 2.0, available at
* http://hadoop.apache.org.
*
* © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
*
*/
@SuppressWarnings("unchecked")
public class DataJoinKey extends Tuple {
// add 2 digit prefix to ensure when deserialize it from byte arrays,
// we preserve the order, because when {@link Tuple} serialize itself,
// it iterates column names (in natural order) one by one to serialize
// the values of the columns.
public static String ACUTAL_KEY = "00_MOBIUS_KEY";
public static String DATASET_ID = "01_MOBIUS_DATASETID";
//public static String SORT_KEYWORD_FIELDNAME = "02_MOBIUS_SORT_KEYWORD";
//public static String SORT_COMPARATOR_FIELDNAME = "03_MOBIUS_SORT_COMPARATOR";
// to be called by Hadoop on org.apache.hadoop.mapred.JobConf.getOutputKeyComparator
public DataJoinKey(){}
public DataJoinKey(Byte datasetID, WritableComparable<?> key)
{
set(datasetID, key, null, null);
}
public DataJoinKey(Byte datasetID, WritableComparable<?> key, WritableComparable<?> sortKeyword, Class<?> sortComparator)
{
set(datasetID, key, sortKeyword, sortComparator);
}
public void set(Byte datasetID, WritableComparable<?> key, WritableComparable<?> sortKeyword, Class<?> sortComparator)
{
this.put(ACUTAL_KEY, key);
this.put(DATASET_ID, datasetID.byteValue());
//this.put(SORT_KEYWORD_FIELDNAME, sortKeyword==null?NullWritable.get():sortKeyword);
//this.put(SORT_COMPARATOR_FIELDNAME, sortComparator==null?Class.class.getName():sortComparator.getName());
}
@Override
public void readFields(DataInput in)
throws IOException
{
super.readFields(in);
// ordering matters
this.setSchema(new String[]{ACUTAL_KEY, DATASET_ID/*, SORT_KEYWORD_FIELDNAME, SORT_COMPARATOR_FIELDNAME*/});
}
public WritableComparable getKey()
{
return (WritableComparable<?>)this.get(ACUTAL_KEY);
}
public Byte getDatasetID()
{
return this.getByte(DATASET_ID);
}
public WritableComparable getSortKeyword()
{
//return (WritableComparable)this.get(SORT_KEYWORD_FIELDNAME);
return null;
}
public Class getSortComparator()
{
//return Util.getClass(this.getString(SORT_COMPARATOR_FIELDNAME));
return null;
}
private static Sorter[] _SORTERS;
private Sorter[] getSorter()
{
if( _SORTERS==null )
{
if( this.conf==null || this.conf.get(ConfigureConstants.SORTERS, "").isEmpty() )
{
_SORTERS = new Sorter[0];
}
else
{
try
{
_SORTERS = (Sorter[])SerializableUtil.deserializeFromBase64(this.conf.get(ConfigureConstants.SORTERS), conf);
} catch (IOException e)
{
throw new RuntimeException("Cannot deserialize sorters from :["+this.conf.get(ConfigureConstants.SORTERS)+"] using Base64 decoder.", e);
}
}
}
return _SORTERS;
}
@Override
public int compareTo(Tuple other)
{
WritableComparable<?> key = (WritableComparable<?>)other.get(ACUTAL_KEY);
int cmp = _COLUMN_COMPARATOR.compareKey(this.getKey(), key, this.getSorter(), this.conf);
if(cmp!=0) return cmp;
cmp = getDatasetID().compareTo(other.getByte(DATASET_ID));
if(cmp!=0) return cmp;
return 0;
}
@Override
public int compare(Tuple t1, Tuple t2)
{
if(t1 instanceof DataJoinKey && t2 instanceof DataJoinKey)
{
int result = t1.compareTo(t2);
return result;
}
else
{
return super.compare(t1, t2);
}
}
public static class Comparator extends WritableComparator
{
public Comparator()
{
super(DataJoinKey.class);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
if (a instanceof DataJoinKey && b instanceof DataJoinKey){
return ((DataJoinKey)a).getKey().compareTo(((DataJoinKey)b).getKey());
}
return super.compare(a, b);
}
}
private WritableComparable getKey(byte type, DataInputBuffer input)
throws IOException
{
if( type==Tuple.NULL_WRITABLE_TYPE )
return NullWritable.get();
else if (type==Tuple.TUPLE_TYPE)
{
Tuple newTuple = new Tuple();
newTuple.readFields(input);
return newTuple;
}
else
{
WritableComparable w = (WritableComparable)ReflectionUtils.newInstance( Util.getClass(input.readUTF()), conf);
w.readFields(input);
return w;
}
}
private final TupleColumnComparator _COLUMN_COMPARATOR = new TupleColumnComparator();
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
{
DataInputBuffer d1 = new DataInputBuffer();
d1.reset(b1, s1, l1);
DataInputBuffer d2 = new DataInputBuffer();
d2.reset(b2, s2, l2);
int _compare_result = Integer.MAX_VALUE;
try
{
// the comparing ordering:
// 1. DataJoinKey#KEY_FIELDNAME
// 2. DataJoinKey#DATASET_ID_FIELDNAME
// 3. DataJoinKey#SORT_KEYWORD_FIELDNAME - removed
// 4. DataJoinKey#SORT_COMPARATOR_FIELDNAME - removed
// read number of columns from the two tuple,
// but there is no need to compare the length
// of columns, we just read the values.
d1.readInt();
d2.readInt();
//////////////////////////////////////////////////////////
// compare KEY, values from DataJoinKey#KEY_FIELDNAME
// KEY represents the actual key user specified
///////////////////////////////////////////////////////////
byte type1 = d1.readByte();
byte type2 = d2.readByte();
_COLUMN_COMPARATOR.setType(type1, type2);
// writable, check if they are Tuple or NullWritable
if( type1==Tuple.NULL_WRITABLE_TYPE && type2==Tuple.NULL_WRITABLE_TYPE )
{
// consider equal, do nothing
_compare_result = 0;
}
else if ( type1==Tuple.TUPLE_TYPE && type2==Tuple.TUPLE_TYPE )
{
// both are Tuple
Tuple k1 = (Tuple)getKey(type1, d1);
Tuple k2 = (Tuple)getKey(type2, d2);
_compare_result = _COLUMN_COMPARATOR.compareKey(k1, k2, this.getSorter(), conf);
}
else
{
// DataJoinKey only support NullWritable and Tuple for the DataJoinKey#KEY_FIELDNAME
throw new IllegalArgumentException("Cannot compare "+Tuple.getTypeString(type1)+" and "+Tuple.getTypeString(type2));
}
// if they are not the same, these two records should go to
// different reducer, or different reduce iteration.
if(_compare_result != 0) return _compare_result;
//////////////////////////////////////////////////////////////////////////
// compare DATASET_ID, values from DataJoinKey#DATASET_ID_FIELDNAME,
// at this point, the keys are the same, they should go to the same
// reducer, we need to make sure the values from DATASET1 always come
// before DATASET2, so we need to compare the DATASET_ID here.
//////////////////////////////////////////////////////////////////////////
try
{
_COLUMN_COMPARATOR.setType(d1.readByte(), d2.readByte());
_compare_result = _COLUMN_COMPARATOR.compare(d1, d2, this.conf);
if(_compare_result != 0) return _compare_result;
}catch(IOException e)
{
byte[] b = new byte[l1];
for( int i=0;i<l1;i++ )
{
b[i] = b1[s1+i];
}
System.err.println(Arrays.toString(b));
System.err.println("type1:"+type1+", type2:"+type2);
throw e;
}
return 0;
}
catch(IOException e)
{
throw new RuntimeException(e);
}
}
}