package backtype.storm.contrib.hbase.utils;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import backtype.storm.tuple.Tuple;
/**
* Configuration for Storm {@link Tuple} to HBase serialization.
*/
@SuppressWarnings("serial")
public class TupleTableConfig implements Serializable {
public static final long DEFAULT_INCREMENT = 1L;
private String tableName;
protected String tupleRowKeyField;
protected String tupleTimestampField;
protected Map<String, Set<String>> columnFamilies;
private boolean batch = true;
protected boolean writeToWAL = true;
private long writeBufferSize = 0L;
/**
* Initialize configuration
* @param table The HBase table name
* @param rowKeyField The {@link Tuple} field used to set the rowKey
*/
public TupleTableConfig(final String table, final String rowKeyField) {
this.tableName = table;
this.tupleRowKeyField = rowKeyField;
this.tupleTimestampField = "";
this.columnFamilies = new HashMap<String, Set<String>>();
}
/**
* Initialize configuration
* @param table The HBase table name
* @param rowKeyField The {@link Tuple} field used to set the rowKey
* @param timestampField The {@link Tuple} field used to set the timestamp
*/
public TupleTableConfig(final String table, final String rowKeyField, final String timestampField) {
this.tableName = table;
this.tupleRowKeyField = rowKeyField;
this.tupleTimestampField = timestampField;
this.columnFamilies = new HashMap<String, Set<String>>();
}
/**
* Add column family and column qualifier to be extracted from tuple
* @param columnFamily The column family name
* @param columnQualifier The column qualifier name
*/
public void addColumn(final String columnFamily, final String columnQualifier) {
Set<String> columns = this.columnFamilies.get(columnFamily);
if (columns == null) {
columns = new HashSet<String>();
}
columns.add(columnQualifier);
this.columnFamilies.put(columnFamily, columns);
}
/**
* Creates a HBase {@link Put} from a Storm {@link Tuple}
* @param tuple The {@link Tuple}
* @return {@link Put}
*/
public Put getPutFromTuple(final Tuple tuple) {
byte[] rowKey = Bytes.toBytes(tuple.getStringByField(tupleRowKeyField));
long ts = 0;
if (!tupleTimestampField.equals("")) {
ts = tuple.getLongByField(tupleTimestampField);
}
Put p = new Put(rowKey);
p.setWriteToWAL(writeToWAL);
if (columnFamilies.size() > 0) {
for (String cf : columnFamilies.keySet()) {
byte[] cfBytes = Bytes.toBytes(cf);
for (String cq : columnFamilies.get(cf)) {
byte[] cqBytes = Bytes.toBytes(cq);
byte[] val = Bytes.toBytes(tuple.getStringByField(cq));
if (ts > 0) {
p.add(cfBytes, cqBytes, ts, val);
} else {
p.add(cfBytes, cqBytes, val);
}
}
}
}
return p;
}
/**
* Creates a HBase {@link Increment} from a Storm {@link Tuple}
* @param tuple The {@link Tuple}
* @param increment The amount to increment the counter by
* @return {@link Increment}
*/
public Increment getIncrementFromTuple(final Tuple tuple, final long increment) {
byte[] rowKey = Bytes.toBytes(tuple.getStringByField(tupleRowKeyField));
Increment inc = new Increment(rowKey);
inc.setWriteToWAL(writeToWAL);
if (columnFamilies.size() > 0) {
for (String cf : columnFamilies.keySet()) {
byte[] cfBytes = Bytes.toBytes(cf);
for (String cq : columnFamilies.get(cf)) {
byte[] val;
try {
val = Bytes.toBytes(tuple.getStringByField(cq));
} catch (IllegalArgumentException ex) {
// if cq isn't a tuple field, use cq for counter instead of tuple
// value
val = Bytes.toBytes(cq);
}
inc.addColumn(cfBytes, val, increment);
}
}
}
return inc;
}
/**
* Increment the counter for the given family and column by the specified amount
* <p>
* If the family and column already exist in the Increment the counter value is incremented by the
* specified amount rather than overridden, as it is in HBase's
* {@link Increment#addColumn(byte[], byte[], long)} method
* @param inc The {@link Increment} to update
* @param family The column family
* @param qualifier The column qualifier
* @param amount The amount to increment the counter by
*/
public static void addIncrement(Increment inc, final byte[] family, final byte[] qualifier,
final Long amount) {
NavigableMap<byte[], Long> set = inc.getFamilyMap().get(family);
if (set == null) {
set = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
}
// If qualifier exists, increment amount
Long counter = set.get(qualifier);
if (counter == null) {
counter = 0L;
}
set.put(qualifier, amount + counter);
inc.getFamilyMap().put(family, set);
}
/**
* @return the tableName
*/
public String getTableName() {
return tableName;
}
/**
* @return Whether batch mode is enabled
*/
public boolean isBatch() {
return batch;
}
/**
* @param batch Whether to enable HBase's client-side write buffer.
* <p>
* When enabled your bolt will store put operations locally until the write buffer is
* full, so they can be sent to HBase in a single RPC call. When disabled each put
* operation is effectively an RPC and is sent straight to HBase. As your bolt can
* process thousands of values per second it is recommended that the write buffer is
* enabled.
* <p>
* Enabled by default
*/
public void setBatch(boolean batch) {
this.batch = batch;
}
/**
* @param writeToWAL Sets whether to write to HBase's edit log.
* <p>
* Setting to false will mean fewer operations to perform when writing to HBase and hence
* better performance, but changes that haven't been flushed to a store file will be lost
* in the event of HBase failure
* <p>
* Enabled by default
*/
public void setWriteToWAL(boolean writeToWAL) {
this.writeToWAL = writeToWAL;
}
/**
* @return True if write to HBase's edit log (WAL), false if not
*/
public boolean isWriteToWAL() {
return writeToWAL;
}
/**
* @param writeBufferSize Overrides the client-side write buffer size.
* <p>
* By default the write buffer size is 2 MB (2097152 bytes). If you are storing larger
* data, you may want to consider increasing this value to allow your bolt to efficiently
* group together a larger number of records per RPC
* <p>
* Overrides the write buffer size you have set in your hbase-site.xml e.g.
* <code>hbase.client.write.buffer</code>
*/
public void setWriteBufferSize(long writeBufferSize) {
this.writeBufferSize = writeBufferSize;
}
/**
* @return the writeBufferSize
*/
public long getWriteBufferSize() {
return writeBufferSize;
}
/**
* @return A Set of configured column families
*/
public Set<String> getColumnFamilies() {
return this.columnFamilies.keySet();
}
/**
* @return the tupleRowKeyField
*/
public String getTupleRowKeyField() {
return tupleRowKeyField;
}
}