package com.ebay.erl.mobius.core.model;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import com.ebay.erl.mobius.core.ConfigureConstants;
import com.ebay.erl.mobius.core.collection.CaseInsensitiveTreeMap;
import com.ebay.erl.mobius.util.Util;
/**
* Represents a record(row) in a dataset.
* <p>
*
* <p>
* This product is licensed under the Apache License, Version 2.0,
* available at http://www.apache.org/licenses/LICENSE-2.0.
*
* This product contains portions derived from Apache hadoop which is
* licensed under the Apache License, Version 2.0, available at
* http://hadoop.apache.org.
*
* © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
*
*/
public class Tuple
implements WritableComparable<Tuple>, Cloneable, Configurable, RawComparator<Tuple>
{
public static void main(String[] arg)
throws Throwable
{
DataOutputStream dos = new DataOutputStream(new FileOutputStream(new File("s:/test.binary")));
//dos.writeUTF("00_DW_LSTG_ITEM");
dos.writeInt(5);
dos.flush();
dos.close();
}
protected long estimate_size_in_bytes = 8/* object header*/;
public static final byte BYTE_TYPE = 0;
public static final byte SHORT_TYPE = 1;
public static final byte INTEGER_TYPE = 2;
public static final byte LONG_TYPE = 3;
public static final byte FLOAT_TYPE = 4;
public static final byte DOUBLE_TYPE = 5;
public static final byte STRING_TYPE = 6;
/**
* java.sql.Date, only contains yyyy-mm-dd
*/
public static final byte DATE_TYPE = 7;
/**
* java.sql.Timestamp type, contains
* yyyy-mm-dd hh:mm:ss.[fff]
*/
public static final byte TIMESTAMP_TYPE = 8;
/**
* java.sql.Time type, contains hh:mm:ss.[fff]
*/
public static final byte TIME_TYPE = 9;
public static final byte BOOLEAN_TYPE = 10;
/**
* array of elements, the element types have
* to be the one supported by Tuple.
*/
public static final byte ARRAY_TYPE = 11;
/**
* representing value generated by a combiner
*/
public static final byte RESULT_WRAPPER_TYPE = 120;
/**
* represents org.apache.hadoop.io.NullWritable
*/
public static final byte NULL_WRITABLE_TYPE = 121;
/**
* Represents com.ebay.erl.mobius.core.model.Tuple type.
*/
public static final byte TUPLE_TYPE = 122;
/**
* Represents byte[].
*/
public static final byte BYTE_ARRAY_TYPE = 123;
/**
* Represents {@link CaseInsensitiveTreeMap} type.
*/
public static final byte STRING_MAP_TYPE = 124;
/**
* Represents {@link org.hadoop.hadoop.io.Writable}
* type.
*/
public static final byte WRITABLE_TYPE = 125;
/**
* Represents {@link java.io.Serializable}
*/
public static final byte SERIALIZABLE_TYPE = 126;
/**
* Represents java <code>null</code>.
*/
public static final byte NULL_TYPE = 127;
private static final Log LOGGER = LogFactory.getLog(Tuple.class);
/**
* a mapping from a column name to an index in the {@link #values}
* array to get it value.
*/
protected Map<String, Integer> namesToIdxMapping = new HashMap<String, Integer>();
/**
* to hold the actual values of columns within this mapper.
*/
protected List<Object> values = new ArrayList<Object>(1);
/**
* key used for synchronizing update activities to
* this tuple.
*/
protected final String _INSERT_KEY = "insert";
/**
* The default delimiter to separate the column
* values, used in {@link #toString()} and can
* be override by providing {@link ConfigureConstants.TUPLE_TO_STRING_DELIMITER}
*/
private static String _DELIMITER = "\t";
/**
* Hadoop configuration object.
*/
protected Configuration conf;
protected boolean isMutable = true;
/**
* For quick look up of lower cases form of a given string so
* we don't to toLowerCase() everytime when retriving or setting
* value to a tuple
*/
protected static Map<String/* any string*/, String/*lower case of the key*/> lowerCases =
Collections.synchronizedMap(new HashMap<String, String>());
protected static Map<Set<String>/*not sorted set*/, List<String>/*sorted keys*/> sortedKeys =
Collections.synchronizedMap(new HashMap<Set<String>, List<String>>());
/**
* An immutable {@link Tuple} which contains only single Column
* NULL with null value.
*/
public static final Tuple NULL;
static
{
Tuple nullTuple = new Tuple();
nullTuple.putNull("NULL");
NULL = Tuple.immutable(nullTuple);
}
protected String[] toStringOrdering;
protected synchronized static String lowerCase(String key)
{
String result = lowerCases.get(key);
if( result==null ){
result = key.toLowerCase();
lowerCases.put(key, result);
}
return result;
}
protected synchronized static List<String> getSorted(Set<String> aKeySet)
{
List<String> sorted = sortedKeys.get(aKeySet);
if( sorted==null ){
sorted = new ArrayList<String>();
for( String aKey:aKeySet ){
sorted.add(lowerCase(aKey));
}
Collections.sort(sorted);
sortedKeys.put(aKeySet, sorted);
}
return sorted;
}
/**
* To set the schema of this tuple.
* <p>
*
* This method is called when deserialize
* a tuple from disk, and should be called
* only in that case.
* <p>
*
* The ordering of the <code>schema</code> is
* sorted first then set into this tuple. It
* it because when a tuple is being serialize to
* disk, the values are extracted according to
* the order of their name. So when deserialize
* it back, the schema need to be sorted first
* and then set.
* <p>
*
* The reason of doing this is because Mobius stores
* the schema in hadoop configuration, but the user
* might create Tuples with the same schema but in
* different ordering (insert the values in the different
* order than the defined schema). To solve this
* problem, Mobius always serialize the values in
* a tuple according to their schema name order, so it
* can be deserialized back always in the right schema.
*
*/
public void setSchema(String[] schema)
{
Arrays.sort(schema);
this.namesToIdxMapping.clear();
int idx = 0;
for( String aName:schema )
{
this.namesToIdxMapping.put(lowerCase(aName), idx++);
}
}
/**
* Check if the given <code>name</code> exists
* in this tuple or not, if so, returns its index.
*
* @return if the given <code>name</code> is in the
* schema of this tuple, the index of the schema
* is returned. Otherwise, {@link IllegalArgumentException}
* is thrown.
*/
protected int check_in_schema(String name)
{
Integer idx = null;
if( (idx=this.namesToIdxMapping.get(lowerCase(name)))!=null )
{
return idx;
}
else
{
throw new IllegalArgumentException("["+name+"] doesn't exist in this tuple's schema:"+this.namesToIdxMapping.keySet()+", index:"+this.namesToIdxMapping.values());
}
}
/**
* Get the value of the given column <code>name</code> in
* the <code>expecting_type</code>.
* <p>
*
* If the original <code>value</code> is not in the exact
* same <code>expecting_type</code>, Mobius will try to
* convert it to the <code>expecting_type</code> and return
* it.
* <p>
*
* If the original <code>value</code> is null, then
* <code>default_value</code> is returned.
*
* @param expecting_type user specified type for the returned value.
* @param name the name of a column within this tuple.
* @param value the original value of the column <code>name</code>
* @param default_value if the original value is null, then <code>default_value</code>
* is returned.
* @return
*/
protected Object get(byte expecting_type, String name, Object value, Object default_value)
{
byte actual_type = Tuple.getType(value);
if( expecting_type==Tuple.getType(value) )
{
return value;
}
else
{
// expecting type and actual type are different.
if ( Tuple.isNumericalType(expecting_type) && Tuple.isNumericalType(actual_type) )
{
if( value==null )
{
return default_value;
}
// expecting value and actual value are both numerical type,
// but not exact the same, perform transformation.
switch(expecting_type)
{
case BYTE_TYPE:
return ((Number)value).byteValue();
case SHORT_TYPE:
return ((Number)value).shortValue();
case INTEGER_TYPE:
return ((Number)value).intValue();
case LONG_TYPE:
return ((Number)value).longValue();
case FLOAT_TYPE:
return ((Number)value).floatValue();
case DOUBLE_TYPE:
return ((Number)value).doubleValue();
default:
throw new IllegalArgumentException(String.format("%02X", expecting_type)+" is not numerical type.");
}
}
else if(expecting_type==STRING_TYPE && actual_type!=STRING_TYPE)
{
if( value==null )
{
return default_value;
}
LOGGER.trace("Accessing column["+name+"], the expecting type is ["+Tuple.getTypeString(expecting_type)+"], " +
"but actual type is ["+Tuple.getTypeString(actual_type)+"], using toString() to get the value.");
// expecting type is string, but the actual type is not string,
// convert it to string by calling toString().
return value.toString();
}
else if( Tuple.isDateType(expecting_type) && Tuple.isDateType(actual_type) )
{
// date type, but the expecting type is not the same as the actual type.
// Ex:, expecting java.sql.Date, but actual is java.sql.Timestamp
if( value==null )
{
return default_value;
}
// use java.util.Date as the actual type would be
// either java.sql.Date, java.sql.Time or
// java.sql.Timestamp.
java.util.Date actual_value = (java.util.Date)value;
switch(expecting_type)
{
case Tuple.DATE_TYPE:
java.sql.Date sqlDate = new java.sql.Date(actual_value.getTime());
return sqlDate;
case Tuple.TIME_TYPE:
java.sql.Time sqlTime = new java.sql.Time(actual_value.getTime());
return sqlTime;
case Tuple.TIMESTAMP_TYPE:
java.sql.Timestamp sqlTimeStamp = new java.sql.Timestamp(actual_value.getTime());
return sqlTimeStamp;
default:
throw new IllegalArgumentException(Tuple.getTypeString(actual_type)+" is not a date type.");
}
}
else if( Tuple.isDateType(expecting_type) && actual_type==STRING_TYPE )
{
// expecting type is date type, but the actual type is string
switch(expecting_type)
{
case Tuple.DATE_TYPE:
java.sql.Date sqlDate = java.sql.Date.valueOf((String)value);
return sqlDate;
case Tuple.TIME_TYPE:
java.sql.Time sqlTime = java.sql.Time.valueOf((String)value);
return sqlTime;
case Tuple.TIMESTAMP_TYPE:
java.sql.Timestamp sqlTimeStamp = java.sql.Timestamp.valueOf((String)value);
return sqlTimeStamp;
default:
throw new IllegalArgumentException(Tuple.getTypeString(actual_type)+" is not a date type.");
}
}
else if( Tuple.isNumericalType(expecting_type) && actual_type==STRING_TYPE )
{
if( value==null )
{
return default_value;
}
// expecting type is numerical, but the actual type is string,
// try to convert it into numerical value
String value_str = (String)value;
try
{
switch(expecting_type)
{
case BYTE_TYPE:
return Byte.parseByte(value_str);
case SHORT_TYPE:
return Short.parseShort(value_str);
case INTEGER_TYPE:
return Integer.parseInt(value_str);
case LONG_TYPE:
return Long.parseLong(value_str);
case FLOAT_TYPE:
return Float.parseFloat(value_str);
case DOUBLE_TYPE:
return Double.parseDouble(value_str);
default:
throw new IllegalArgumentException(String.format("%02X", expecting_type)+" is not numerical type.");
}
}catch(NumberFormatException e)
{
throw new NumberFormatException("The value of column["+name+"] is ["+value_str+"] and cannot be converted into "+Tuple.getTypeString(expecting_type));
}
}
else if( expecting_type==BOOLEAN_TYPE && actual_type==STRING_TYPE )
{
return Boolean.valueOf((String)value);
}
throw new ClassCastException("Column ["+name+"] is " +
Tuple.getTypeString(actual_type) + ", cannot be converted into "+Tuple.getTypeString(expecting_type));
}
}
/**
* Get the value for column <code>name</code> in the
* <code>expecting_type</code>.
*
* @param expecting_type the type of the returned object.
* @param name name of a column in this tuple.
* @param default_value if the value of the column is null, then
* <code>default_value</code> is returned.
* @return the value in the <code>expected_type</code> of column
* <code>name</code>
*/
protected Object get(byte expecting_type, String name, Object default_value)
{
Object value = this.get(name);
return this.get(expecting_type, name, value, default_value);
}
/**
* Get the value of <code>idx</code><sub>th</sub>
* column in this tuple.
* <p>
*
* If the value of that column is not double, Mobius will
* try to convert it to double if possible, otherwise,
* {@link NumberFormatException} will be thrown.
*
* @param idx index to a column in this tuple, starts from 0.
* @param default_value if the value of the column is null,
* then <code>default_value</code> is returned.
* @return
*/
public Double getDouble(int idx, double default_value)
{
Object value = this.get(idx);
return (Double)this.get(Tuple.DOUBLE_TYPE, "@index:"+idx, value, default_value);
}
/**
* Get the readable string representation for a given
* type.
*
* @param type type supported by Tuple.
* @return a readable string representation of the
* <code>type</code>.
*/
public static String getTypeString(byte type)
{
TupleTypeHandler<String> converter = new TupleTypeHandler<String>(){
@Override
protected String on_boolean() throws IOException {
return Boolean.class.getCanonicalName();
}
@Override
protected String on_byte() throws IOException {
return Byte.class.getCanonicalName();
}
@Override
protected String on_byte_array() throws IOException {
return "byte[]";
}
@Override
protected String on_date() throws IOException {
return java.sql.Date.class.getCanonicalName();
}
@Override
protected String on_default() throws IOException {
throw new IllegalArgumentException("Unsupported type ["+String.format("0x%02X", type)+"]");
}
@Override
protected String on_double() throws IOException {
return Double.class.getCanonicalName();
}
@Override
protected String on_float() throws IOException {
return Float.class.getCanonicalName();
}
@Override
protected String on_integer() throws IOException {
return Integer.class.getCanonicalName();
}
@Override
protected String on_long() throws IOException {
return Long.class.getCanonicalName();
}
@Override
protected String on_null() throws IOException {
return "Null";
}
@Override
protected String on_null_writable() throws IOException {
return NullWritable.class.getCanonicalName();
}
@Override
protected String on_serializable() throws IOException {
return Serializable.class.getCanonicalName();
}
@Override
protected String on_short() throws IOException {
return Short.class.getCanonicalName();
}
@Override
protected String on_string() throws IOException {
return String.class.getCanonicalName();
}
@Override
protected String on_string_map() throws IOException {
return Map.class.getCanonicalName()+"<String, String>";
}
@Override
protected String on_time() throws IOException {
return Time.class.getCanonicalName();
}
@Override
protected String on_timestamp() throws IOException {
return Timestamp.class.getCanonicalName();
}
@Override
protected String on_tuple() throws IOException {
return Tuple.class.getCanonicalName();
}
@Override
protected String on_writable() throws IOException {
return WritableComparable.class.getCanonicalName();
}
@Override
protected String on_result_wrapper() throws IOException {
return ResultWrapper.class.getCanonicalName();
}
@Override
protected String on_array() throws IOException {
return Array.class.getCanonicalName();
}
};
try {
return converter.handle(type);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Get the type of the given <code>obj</code>
* in <code>byte</code> format, one of the
* supported type in Tuple.
*/
@SuppressWarnings("unchecked")
public static byte getType(Object obj)
{
if (obj ==null )
{
return NULL_TYPE;
}
else if (obj instanceof Byte)
{
return BYTE_TYPE;
}
else if( obj instanceof Short)
{
return SHORT_TYPE;
}
else if( obj instanceof Integer )
{
return INTEGER_TYPE;
}
else if (obj instanceof Long)
{
return LONG_TYPE;
}
else if (obj instanceof Float)
{
return FLOAT_TYPE;
}
else if (obj instanceof Double)
{
return DOUBLE_TYPE;
}
else if (obj instanceof String)
{
return STRING_TYPE;
}
else if (obj instanceof java.sql.Date)
{
return DATE_TYPE;
}
else if (obj instanceof Timestamp)
{
return TIMESTAMP_TYPE;
}
else if (obj instanceof Time)
{
return TIME_TYPE;
}
else if (obj instanceof Boolean)
{
return BOOLEAN_TYPE;
}
else if (obj instanceof Map)
{
return STRING_MAP_TYPE;
}
else if (obj instanceof Array)
{
return ARRAY_TYPE;
}
else if (obj instanceof NullWritable)
{
return NULL_WRITABLE_TYPE;
}
else if (obj instanceof Tuple )
{
return TUPLE_TYPE;
}
else if (obj instanceof ResultWrapper)
{
return RESULT_WRAPPER_TYPE;
}
else if (obj instanceof Writable)
{
return WRITABLE_TYPE;
}
else if (obj instanceof Serializable)
{
return SERIALIZABLE_TYPE;
}
else if ( obj instanceof byte[])
{
return BYTE_ARRAY_TYPE;
}
else
{
throw new IllegalArgumentException(obj.getClass().getName()+" is not supported in Tuple.");
}
}
/**
* Deserialize the tuple from the input
* <code>in</code>.
*/
@Override
public void readFields(DataInput in)
throws IOException
{
if( this.values==null )
{
this.values = new ArrayList<Object>();
}
else
this.values.clear();
int columns_nbrs = in.readInt();
ReadFieldImpl read_impl = new ReadFieldImpl(this.values, in, this.conf);
for( int i=0;i<columns_nbrs;i++ )
{
byte type = in.readByte();
read_impl.handle(type);
}
}
/**
* Serialize this tuple to the output <code>out</code>.
* <p>
*
* When serialize, the values are stored in the order
* of schema name's ordering. See {@link #setSchema(String[])}
* for more explanation.
*/
@Override
public void write(DataOutput out)
throws IOException
{
// write the size of the column of this tuple
out.writeInt(this.values.size());
if( this.values.size()!=this.namesToIdxMapping.size() )
{
StringBuffer sb = new StringBuffer();
for( Object v:values)
sb.append(v.toString()).append(",");
throw new IllegalArgumentException(this.getClass().getCanonicalName()+", the length of values and schmea is not the same, " +
"very likely the schema of this tuple has not been set yet, please set it using Tuple#setSchema(String[])." +
" Values:["+sb.toString()+"] schema:"+this.namesToIdxMapping.keySet());
}
WriteImpl writeImpl = new WriteImpl(out);
for ( String aColumnName : getSorted(this.namesToIdxMapping.keySet()) )
{
Object value = this.values.get(this.namesToIdxMapping.get(aColumnName));
byte type = getType(value);
out.write(type);
writeImpl.setValue(value);
writeImpl.handle(type);
}
}
/**
* Compare this tuple with <code>other</code>.
* <p>
*
* It calls {@link #compare(Tuple, Tuple)} underline.
*/
@Override
public int compareTo(Tuple other)
{
return compare(this, other);
}
/**
* Add a new column in the given <code></code> with
* provided <code>value</code>.
*
* @throws UnsupportedOperationException if this tuple is immutable.
*/
public Tuple insert(String name, Object value)
{
if( !this.isMutable )
{
throw new UnsupportedOperationException("This tuple is immutable, cannot be modified.");
}
TupleColumnName tcn = TupleColumnName.valueOf(lowerCase(name));
String id = tcn.getID();
String mapKey = tcn.getMapKey();
synchronized(this._INSERT_KEY)
{
if( this.namesToIdxMapping.containsKey(id) )
{
// do nothing
}
else
{
this.namesToIdxMapping.put(id, this.namesToIdxMapping.size());
}
int value_idx = this.namesToIdxMapping.get(id);
if( value_idx<this.values.size() )
{
// replace mode, replace the old value
if( mapKey==null )
{
// the <code>name</code> is not map ID style
this.values.set(value_idx, value);
}
else
{
if (this.values.get(value_idx) instanceof CaseInsensitiveTreeMap)
{
((CaseInsensitiveTreeMap)this.values.get(value_idx)).put(mapKey, value.toString());
}
else
{
throw new IllegalArgumentException("Column ["+id+"] is not "+CaseInsensitiveTreeMap.class.getCanonicalName()+", " +
"cannot change the value using map style ID ["+name+"]");
}
}
}
else if (value_idx == this.values.size())
{
// insert mode
if( mapKey==null )
{
// the <code>name</code> is not map ID style
this.values.add(value_idx, value);
}
else
{
// user tries to use a Map style ID to add new value, disallow
throw new IllegalArgumentException("Column ["+id+"] has not been initialized as Map, " +
"cannot use ["+name+"] to change the value of the key directly.");
}
}
else
{
throw new IllegalStateException();
}
}
return this;
}
/**
* Get Hadoop configuration.
*/
@Override
public Configuration getConf()
{
return this.conf;
}
/**
* Set the Hadoop configuration, the
* delimiter to be used to separated
* the column value in the {@link #toString()}
* is also set here.
* <p>
*
* The default delimiter is tab, unless
* {@link ConfigureConstants.TUPLE_TO_STRING_DELIMITER}
* is set by user.
*/
@Override
public void setConf(Configuration conf)
{
this.conf = conf;
synchronized(Tuple._DELIMITER)
{
if ( Tuple._DELIMITER.isEmpty() )
{
Tuple._DELIMITER = this.conf.get(ConfigureConstants.TUPLE_TO_STRING_DELIMITER, "\t");
}
}
}
private final TupleColumnComparator _COLUMN_COMPARATOR = new TupleColumnComparator();
/**
* compare two tuples in low level row format.
*/
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
{
DataInputBuffer d1 = new DataInputBuffer();
d1.reset(b1, s1, l1);
DataInputBuffer d2 = new DataInputBuffer();
d2.reset(b2, s2, l2);
int _compare_result = Integer.MAX_VALUE;
try
{
// read number of columns from the two tuple
int columns_nbr1 = d1.readInt();
int columns_nbr2 = d2.readInt();
int upper_bound = Math.min(columns_nbr1, columns_nbr2);
// same column size, start to compare column by column
for( int i=0;i<upper_bound;i++ )
{
byte type1 = d1.readByte();
byte type2 = d2.readByte();
_COLUMN_COMPARATOR.setType(type1, type2);
_compare_result = _COLUMN_COMPARATOR.compare(d1, d2, this.conf);
// comparing for a column has complete
if ( _compare_result!=0 && _compare_result!=Integer.MAX_VALUE )
{
// has different, return
return _compare_result;
}
}// end of iterating columns until the upper limit
// finished all columns comparison(up to the upper-bound), still cannot find difference,
// use the column size as the comparing result.
_compare_result = columns_nbr1 - columns_nbr2;
}
catch(IOException e)
{
throw new RuntimeException(e);
}
if (_compare_result==Integer.MAX_VALUE )
throw new IllegalArgumentException();
return _compare_result;
}
/**
* Comparing two tuples.
* <p>
*
* It compares the values of the two tuples one
* by one in sequence, and as long as there is a
* difference between two values, then the
* difference is returned.
* <p>
*
* If the number of values in the tuples are
* different, the values are compared up to
* the boundary of the smaller size tuple. If
* all the values before the boundary have no
* differences, then the smaller size tuple
* is considered to be placed before the bigger
* size tuple.
*/
@Override
public int compare(Tuple t1, Tuple t2)
{
int value1_nbr = t1.values.size();
int value2_nbr = t2.values.size();
int upper_bound = Math.min(value1_nbr, value2_nbr);
int _compare_result = Integer.MAX_VALUE;
try
{
for( int i=0;i<upper_bound;i++)
{
Object v1 = t1.values.get(i);
Object v2 = t2.values.get(i);
byte type1 = Tuple.getType(v1);
byte type2 = Tuple.getType(v2);
_COLUMN_COMPARATOR.setType(type1, type2);
_compare_result = _COLUMN_COMPARATOR.compare(v1, v2, null);
// comparing for a column has complete
if ( _compare_result!=0 && _compare_result!=Integer.MAX_VALUE )
{
// has different, return
return _compare_result;
}
}// end of iterating columns until the upper limit
// finished all columns comparison(up to the upper-bound), still cannot find difference,
// use the column size as the comparing result.
_compare_result = value1_nbr - value2_nbr;
}
catch(IOException e)
{
throw new RuntimeException(e);
}
if (_compare_result==Integer.MAX_VALUE )
throw new IllegalArgumentException();
return _compare_result; // all the same, return 0
}
public Tuple put(String name, ResultWrapper<?> v)
{
if( v==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, v);
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to <code>null</code>.
*
* @return return this tuple
*/
public Tuple putNull(String name)
{
this.insert(name, null);
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, byte value)
{
this.insert(name, value);
// add 16 bytes, 16 bytes is derived from
// java.lang.instrument.Instrumentation to test
// single Byte (as the value will be auto-boxed
// into Byte) on a 64bit VM.
this.estimate_size_in_bytes += 16;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, byte[] value)
{
this.insert(name, value);
// add the length of the value plus 16 bytes
// base.
this.estimate_size_in_bytes += (value.length + 16);
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, short value)
{
this.insert(name, value);
// add 16 bytes, 16 bytes is derived from
// java.lang.instrument.Instrumentation to test
// single Short (as the value will be auto-boxed
// into Short) on a 64bit VM.
this.estimate_size_in_bytes += 16;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, int value)
{
this.insert(name, value);
// add 16 bytes, 16 bytes is derived from
// java.lang.instrument.Instrumentation to test
// Integer.MAX_VALUE (as the value will be auto-boxed
// into Integer) on a 64bit VM.
this.estimate_size_in_bytes += 16;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, long value)
{
this.insert(name, value);
// add 24 bytes, 24 bytes is derived from
// java.lang.instrument.Instrumentation to test
// Long.MAX_VALUE (as the value will be auto-boxed
// into Long) on a 64bit VM.
this.estimate_size_in_bytes += 24;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, float value)
{
this.insert(name, value);
// add 16 bytes, 16 bytes is derived from
// java.lang.instrument.Instrumentation to test
// Float.MAX_VALUE (as the value will be auto-boxed
// into Float) on a 64bit VM.
this.estimate_size_in_bytes += 16;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, double value)
{
this.insert(name, value);
// add 24 bytes, 24 bytes is derived from
// java.lang.instrument.Instrumentation to test
// Double.MAX_VALUE (as the value will be auto-boxed
// into Double) on a 64bit VM.
this.estimate_size_in_bytes += 24;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, boolean value)
{
this.insert(name, value);
// add 16 bytes for inserting a boolean,
// it will be auto-box into Boolean, and
// using java.lang.instrument.Instrumentation
// to test single Boolean on a 64bit VM require
// 16 bytes.
this.estimate_size_in_bytes += 16;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, java.sql.Date value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, value);
// add 24 bytes, 24 bytes is derived from
// java.lang.instrument.Instrumentation to test
// single java.sql.Date on a 64bit VM.
this.estimate_size_in_bytes += 24;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, Timestamp value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, value);
// add 32 bytes, 32 bytes is derived from
// java.lang.instrument.Instrumentation to test
// single java.sql.Timestamp on a 64bit VM.
this.estimate_size_in_bytes += 32;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, Time value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, value);
// add 24 bytes, 24 bytes is derived from
// java.lang.instrument.Instrumentation to test
// single java.sql.Time on a 64bit VM.
this.estimate_size_in_bytes += 24;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, String value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, value);
// reference: http://www.javamex.com/tutorials/memory/string_memory_usage.shtml
// Minimum String memory usage (bytes) = 8 * (int) ((((no chars) * 2) + 45) / 8)
this.estimate_size_in_bytes += 8 * (int) ((((value.length()) * 2) + 45) / 8);
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, CaseInsensitiveTreeMap value)
{
// insure the value is case-insensitive TreeMap
this.insert(name, value);
// reference: http://www.javamex.com/tutorials/memory/string_memory_usage.shtml
// Minimum String memory usage (bytes) = 8 * (int) ((((no chars) * 2) + 45) / 8)
long est_key_size = 8 * (int) ((((64/*assume 64 chars string*/) * 2) + 45) / 8);
this.estimate_size_in_bytes += 48/*map overhead*/ + est_key_size*2/*assume key and value is about the same size*/;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
*/
public Tuple put(String name, Writable value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
this.insert(name, value);
// estimate only, put 512 byte here
this.estimate_size_in_bytes += 512;
return this;
}
/**
* For the given column named <code>name</code>,
* add (if it doesn't exist) to this tuple or update (if it
* exists) its value to the given <code>value</code>.
*
* @return return this tuple
* @throws IllegalArgumentException if <code>value</code> is instance
* of Map but not {@link CaseInsensitiveTreeMap}. Or when <code>value</code>
* doesn't implement {@link Comparable}
*/
public Tuple put(String name, Serializable value)
{
if( value==null )
throw new NullPointerException("value cannot be null.");
if( value instanceof Map<?, ?> && !(value instanceof CaseInsensitiveTreeMap) )
{
throw new IllegalArgumentException("The supported map type is only "+CaseInsensitiveTreeMap.class.getCanonicalName());
}
if( value instanceof Comparable<?>)
{
this.insert(name, value);
// estimate only, put 512 byte here
this.estimate_size_in_bytes += 512;
return this;
}
else
{
throw new IllegalArgumentException(value.getClass().getCanonicalName() +
" doesn't implement "+Comparable.class.getCanonicalName());
}
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, -1 is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a short type, Mobius will try to convert it
* to short, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Short getShort(String name)
{
return this.getShort(name, (short)-1);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a short type, Mobius will try to convert it
* to short, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Short getShort(String name, short default_value)
{
return (Short)get(SHORT_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, -1 is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a short type, Mobius will try to convert it
* to short, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Integer getInt(String name)
{
return getInt(name, -1);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not an integer type, Mobius will try to convert it
* to integer, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Integer getInt(String name, int default_value)
{
return (Integer)get(INTEGER_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, -1L is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a long type, Mobius will try to convert it
* to long, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Long getLong(String name)
{
return (Long)get(LONG_TYPE, name, -1L);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a long type, Mobius will try to convert it
* to long, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Long getLong(String name, long default_value)
{
return (Long)get(LONG_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, -1F is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a float type, Mobius will try to convert it
* to float, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Float getFloat(String name)
{
return (Float)get(FLOAT_TYPE, name, -1F);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a float type, Mobius will try to convert it
* to float, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Float getFloat(String name, float default_value)
{
return (Float)get(FLOAT_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, -1D is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a double type, Mobius will try to convert it
* to double, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Double getDouble(String name)
{
return (Double)get(DOUBLE_TYPE, name, -1D);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a double type, Mobius will try to convert it
* to double, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Double getDouble(String name, double default_value)
{
return (Double)get(DOUBLE_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, 0x00 is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a byte type, Mobius will try to convert it
* to byte, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Byte getByte(String name)
{
return (Byte)get(BYTE_TYPE, name, 0x00);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a byte type, Mobius will try to convert it
* to byte, otherwise, {@link NumberFormatException}
* is thrown.
*/
public Byte getByte(String name, byte default_value)
{
return (Byte)get(BYTE_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>false</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a boolean type, Mobius will try to convert it
* to using {@link Boolean#valueOf(String)}.
*/
public Boolean getBoolean(String name)
{
return (Boolean)get(BOOLEAN_TYPE, name, false);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a boolean type, Mobius will try to convert it
* to using {@link Boolean#valueOf(String)}.
*/
public Boolean getBoolean(String name, boolean default_value)
{
return (Boolean)get(BOOLEAN_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>null</code>
* is still returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a string type, Mobius will try to convert it
* to using the <code>toString()</code> method.
*/
public String getString(String name)
{
return (String)get(STRING_TYPE, name, null);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a string type, Mobius will try to convert it
* to using the <code>toString()</code> method.
*/
public String getString(String name, String default_value)
{
return (String)get(STRING_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>null</code>
* is still returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a map type, {@link IllegalArgumentException}
* is thrown.
*/
@SuppressWarnings("unchecked")
public Map<String, String> getMap(String name)
{
return (Map<String, String>)get(STRING_MAP_TYPE, name, null);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>null</code>
* is still returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Date#valueOf(String)}
* method.
*/
public java.sql.Date getDate(String name)
{
return (java.sql.Date)get(DATE_TYPE, name, null);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Date#valueOf(String)}
* method.
*/
public java.sql.Date getDate(String name, java.sql.Date default_value)
{
return (java.sql.Date)get(DATE_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>null</code>
* is still returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Timestamp#valueOf(String)}
* method.
*/
public Timestamp getTimestamp(String name)
{
return (Timestamp)get(TIMESTAMP_TYPE, name, null);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Timestamp#valueOf(String)}
* method.
*/
public Timestamp getTimestamp(String name, java.sql.Timestamp default_value)
{
return (Timestamp)get(TIMESTAMP_TYPE, name, default_value);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>null</code>
* is still returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Time#valueOf(String)}
* method.
*/
public Time getTime(String name)
{
return (Time)get(TIME_TYPE, name, null);
}
/**
* Get the value of column named <code>name</code>.
* <p>
*
* If the value is <code>null</code>, <code>default_value</code>
* is returned.
* <p>
*
* If the value is not <code>null</code>, but it's
* not a date type, Mobius will try to convert it
* to using the {@link java.sql.Time#valueOf(String)}
* method.
*/
public Time getTime(String name, Time default_value)
{
return (Time)get(TIME_TYPE, name, default_value);
}
/**
* Get value directly using index.
*/
public Object get(int index)
{
return this.values.get(index);
}
/**
* Get the value of column named <code>name</code>.
*
* @param name the name of a column.
* @return value of the column.
*/
public Object get(String name)
{
TupleColumnName tcn = TupleColumnName.valueOf(lowerCase(name));
int idx = check_in_schema(tcn.getID());
if( tcn.getMapKey()==null )
{
// not using map style name to access the value
return this.values.get(idx);
}
else
{
// user is referencing to a value of a map key.
Object value = this.values.get(idx);
if( value instanceof CaseInsensitiveTreeMap)
{
return ((CaseInsensitiveTreeMap)value).get(tcn.getMapKey());
}
else
{
throw new IllegalArgumentException("The type of column ["+tcn.getID()+"] is not "+CaseInsensitiveTreeMap.class.getCanonicalName()+" but "+value.getClass().getCanonicalName()+
", the given ID ["+name+"] is a map style ID and cannot be applied to this column.");
}
}
}
/**
* Return a new instance of {@link Tuple} which
* contains the exact same data of this one.
*/
@Override
public Tuple clone()
{
Tuple clone = new Tuple();
clone.namesToIdxMapping = new HashMap<String, Integer>();
clone.values = new ArrayList<Object>(this.values.size());
// fulfill the values with null
for( int i=0;i<this.values.size();i++ )
{
clone.values.add(null);
}
for( String columnName:this.namesToIdxMapping.keySet() )
{
Integer idx = this.namesToIdxMapping.get(columnName);
clone.namesToIdxMapping.put(columnName, idx);
clone.values.set(idx, this.get(idx));
}
return clone;
}
@Override
public int hashCode()
{
int hashCode = 0;
for(Object obj:this.values)
{
//if( obj==null )
// throw new RuntimeException(this.namesToIdxMapping.toString()+":"+this.values.toString());
if( obj==null )
continue;
hashCode += obj.hashCode();
}
return hashCode;
}
/**
* Test if the given <code>type</code> is
* {@link #BYTE_TYPE}, {@link #SHORT_TYPE},
* {@link #INTEGER_TYPE}, {@link #LONG_TYPE},
* {@link #FLOAT_TYPE}, or {@link #DOUBLE_TYPE}.
* <p>
*
* Return <code>true</code> if the <code>type</code>
* is within the above types, false otherwise.
*/
public static boolean isNumericalType(byte type)
{
return type>=Tuple.BYTE_TYPE && type<=Tuple.DOUBLE_TYPE;
}
/**
* Test if the given <code>type</code> is
* {@link #TIME_TYPE}, {@link #DATE_TYPE}, or
* {@link #TIMESTAMP_TYPE}.
* <p>
*
* Return <code>true</code> if the <code>type</code>
* is within the above types, false otherwise.
*/
public static boolean isDateType(byte type)
{
return type==Tuple.DATE_TYPE || type==Tuple.TIMESTAMP_TYPE || type==Tuple.TIME_TYPE;
}
private void setMutable(boolean isMutable)
{
this.isMutable = isMutable;
}
/**
* return a new instance of tuple that contains the
* same data of the given <code>t</code> tuple, but
* reject all modification requests, such as
* {@link Tuple#insert(String, Object)}.
* <p>
*
* Note that, this method return a new instance, the
* original <code>t</code> tuple is still a mutable
* {@linkplain Tuple}.
*/
public static Tuple immutable(Tuple t)
{
Tuple clone = t.clone();
clone.setMutable(false);
return clone;
}
/**
* Merge the tuples together, and return a new
* tuple represents the merged result.
* <p>
*
* All the columns in <code>t1</code> and
* <code>t2</code> will be put together into
* the returned Tuple. If there are columns
* in <code>t2</code> also appear in <code>t1</code>,
* then values from <code>t2</code> of those columns
* will be used instead of the values in <code>t1</code>.
*/
public static Tuple merge(Tuple t1, Tuple t2)
{
Tuple result = new Tuple();
if( t1!=null )
{
for (String aColumn: t1.getSchema() )
{
result.insert(aColumn, t1.get(aColumn));
}
}
if (t2!=null )
{
for( String aColumn:t2.getSchema() )
{
result.insert(aColumn, t2.get(aColumn));
}
}
return result;
}
/**
* Convert this {@link Tuple} into text, the delimiter is specified by
* "mobius.tuple.tostring.delimiter" (default is tab).
* <p>
*
*/
@Override
public String toString()
{
StringBuffer sb = new StringBuffer();
if( this.toStringOrdering!=null && this.toStringOrdering.length>0 )
{
for (int i=0;i<this.toStringOrdering.length;i++)
{
String aColumn = this.toStringOrdering[i];
Object aValue = this.get(aColumn);
if( aValue!=null )
sb.append(aValue.toString());
if( i<this.values.size()-1 )
sb.append(Tuple._DELIMITER);
}
}
else
{
for( int i=0;i<this.values.size();i++ )
{
Object aValue = this.values.get(i);
if( aValue!=null )
sb.append(aValue.toString());
if( i<this.values.size()-1 )
sb.append(Tuple._DELIMITER);
}
}
return sb.toString();
}
/**
* Compare if the <code>obj</code> equals to
* this tuple or not.
* <p>
*
* Equals only whe the class of this tuple and the
* <code>obj</code> is the same, both share same
* schema, and the values of the columns are the same.
*/
@Override
public boolean equals(Object obj)
{
if( obj==this )
return true;
if( obj.getClass().equals(this.getClass()) )
{
Tuple that = (Tuple)obj;
if( this.namesToIdxMapping.keySet().equals(that.namesToIdxMapping.keySet()))
{
// same schema, test the value one by one
for( String name:this.namesToIdxMapping.keySet() )
{
Object v1 = this.get(name);
Object v2 = that.get(name);
if( v1==null && v2==null ){
// both null, consider equal, move
// on to the next
}
else if( v1==null && v2!=null ){
return false;
}
else if( v1!=null && v2==null ){
return false;
}
else if( !this.get(name).equals(that.get(name)) )
{
// both are not equals
return false;
}
}
return true;
}
else
{
return false;
}
}
return false;
}
/**
* Return the schema of this column.
* <p>
*/
public String[] getSchema()
{
try
{
String[] schema = new String[this.namesToIdxMapping.size()];
for( String aColumnName:this.namesToIdxMapping.keySet() )
{
int idx = this.namesToIdxMapping.get(aColumnName);// the index of this column
schema[idx] = aColumnName;
}
return schema;
}catch(NullPointerException e){
throw e;
}
}
/**
* Convert the <code>source</code> into a tuple.
* <p>
*
* Split the <code>source</code> with the given <code>delimiter</code>,
* and use them as the values to the returned tuple, then set the
* schema to the tuple.
* <p>
*
* The ordering of the schema shall be the same as the ordering of the
* values from the splitted <code>source</code>.
* <p>
*
* If the number of values in the splitted <code>source<code> is greater
* than the length of <code>schema</code>, <code>IDX_$i</code> is used
* as the name of those value, where <code>$i</code> starts from the
* length of <code>schema</code>.
*/
public static Tuple valueOf(Text source, String[] schema, String delimiter)
{
Tuple tuple = new Tuple();
//String[] tokens = source.toString ().split (delimiter, -1);
List<String> tokens = Util.nonRegexSplit(source.toString(), delimiter);
for( int i=0;i<schema.length;i++)
{
if( i<tokens.size() )
{
tuple.put (schema[i], tokens.get(i));
}
else
{
tuple.putNull (schema[i]);
}
}
// there are some extra columns that exceed the length of user
// specified schema, put in the tail.
for( int i=schema.length;i<tokens.size();i++ )
{
tuple.put ("IDX_"+i, tokens.get(i));
}
return tuple;
}
/**
* Return the estimated size in bytes of this
* tuple in memory.
* <p>
*
* This calculation is based on 64bit VM.
*/
public long getEstimatedSizeInMemory()
{
return this.estimate_size_in_bytes;
}
public void setToStringOrdering(String[] columns)
{
this.toStringOrdering = columns;
}
public boolean hasSchema()
{
return this.namesToIdxMapping!=null && this.namesToIdxMapping.keySet().size()>0;
}
/**
* Represents the name of a tuple column.
* <p>
*
* The column name format is specified as a
* regular expression in {@link TupleColumnName#COLUMN_NAME_PATTERN}.
*/
public static final class TupleColumnName
{
/**
* Column name format of a tuple.
* <p>
* The format is: <code>([\\p{Graph}&&[^\\.]]+)(\\.([\\p{Graph}&&[^\\.]]+))?</code>
* <p>
*/
public static final Pattern COLUMN_NAME_PATTERN = Pattern.compile("([\\p{Graph}&&[^\\.]]+)(\\.([\\p{Graph}&&[^\\.]]+))?");
/**
* required, means the id of the column name
*/
private String id;
/**
* optional, for the Map column type only. For example,
* A.B means this tuple has a column named "A" and it's a
* Map type, and user is trying to access the value of key
* "B"
*/
private String mapKey;
private static Map<String, TupleColumnName> tupleColumnNames = new HashMap<String, TupleColumnName>();
/**
* convert the <code>columnName</code> into a {@link TupleColumnName}.
*/
public synchronized static TupleColumnName valueOf(String columnName)
{
if( columnName==null || columnName.trim().isEmpty() )
{
throw new IllegalArgumentException("column name cannot be null nor empty string.");
}
TupleColumnName tcn = null;
if( (tcn=tupleColumnNames.get(columnName))!=null )
{
return tcn;
}
else{
int dotIdx = columnName.indexOf(".");
if( dotIdx<0 )
{
tcn = new TupleColumnName();
tcn.id = columnName;
tcn.mapKey = null;
}
else if (dotIdx>0)
{
if ( dotIdx+1==columnName.length() )
{
throw new IllegalArgumentException("Invalid format of Tuple column name:["+columnName+"], please refer the correct format in {@link Tuple#COLUMN_NAME_PATTERN}");
}
tcn = new TupleColumnName();
tcn.id = columnName.substring(0, dotIdx);
tcn.mapKey = columnName.substring(dotIdx+1);
}
else
{
// dotIdx==0
throw new IllegalArgumentException("Invalid format of Tuple column name:["+columnName+"], please refer the correct format in {@link Tuple#COLUMN_NAME_PATTERN}");
}
tupleColumnNames.put(columnName, tcn);
return tcn;
}
}
/**
* Get the column ID.
*/
public String getID()
{
return this.id;
}
/**
* If the column type is map, user can use, for
* example, <code>ID.MAP_KEY</code> to access a column
* named <code>ID</code>, which is a map, and then use
* <code>MAP_KEY</code> as the key to get the value.
*/
public String getMapKey()
{
return this.mapKey;
}
}
}