Package backtype.storm.contrib.hbase.utils

Source Code of backtype.storm.contrib.hbase.utils.TupleTableConfig

package backtype.storm.contrib.hbase.utils;

import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;

import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;

import backtype.storm.tuple.Tuple;

/**
* Configuration for Storm {@link Tuple} to HBase serialization.
*/
@SuppressWarnings("serial")
public class TupleTableConfig implements Serializable {

  public static final long DEFAULT_INCREMENT = 1L;

  private String tableName;
  protected String tupleRowKeyField;
  protected String tupleTimestampField;
  protected Map<String, Set<String>> columnFamilies;
  private boolean batch = true;
  protected boolean writeToWAL = true;
  private long writeBufferSize = 0L;

  /**
   * Initialize configuration
   * @param table The HBase table name
   * @param rowKeyField The {@link Tuple} field used to set the rowKey
   */
  public TupleTableConfig(final String table, final String rowKeyField) {
    this.tableName = table;
    this.tupleRowKeyField = rowKeyField;
    this.tupleTimestampField = "";
    this.columnFamilies = new HashMap<String, Set<String>>();
  }

  /**
   * Initialize configuration
   * @param table The HBase table name
   * @param rowKeyField The {@link Tuple} field used to set the rowKey
   * @param timestampField The {@link Tuple} field used to set the timestamp
   */
  public TupleTableConfig(final String table, final String rowKeyField, final String timestampField) {
    this.tableName = table;
    this.tupleRowKeyField = rowKeyField;
    this.tupleTimestampField = timestampField;
    this.columnFamilies = new HashMap<String, Set<String>>();
  }

  /**
   * Add column family and column qualifier to be extracted from tuple
   * @param columnFamily The column family name
   * @param columnQualifier The column qualifier name
   */
  public void addColumn(final String columnFamily, final String columnQualifier) {
    Set<String> columns = this.columnFamilies.get(columnFamily);

    if (columns == null) {
      columns = new HashSet<String>();
    }
    columns.add(columnQualifier);

    this.columnFamilies.put(columnFamily, columns);
  }

  /**
   * Creates a HBase {@link Put} from a Storm {@link Tuple}
   * @param tuple The {@link Tuple}
   * @return {@link Put}
   */
  public Put getPutFromTuple(final Tuple tuple) {
    byte[] rowKey = Bytes.toBytes(tuple.getStringByField(tupleRowKeyField));

    long ts = 0;
    if (!tupleTimestampField.equals("")) {
      ts = tuple.getLongByField(tupleTimestampField);
    }

    Put p = new Put(rowKey);
    p.setWriteToWAL(writeToWAL);

    if (columnFamilies.size() > 0) {
      for (String cf : columnFamilies.keySet()) {
        byte[] cfBytes = Bytes.toBytes(cf);
        for (String cq : columnFamilies.get(cf)) {
          byte[] cqBytes = Bytes.toBytes(cq);
          byte[] val = Bytes.toBytes(tuple.getStringByField(cq));

          if (ts > 0) {
            p.add(cfBytes, cqBytes, ts, val);
          } else {
            p.add(cfBytes, cqBytes, val);
          }
        }
      }
    }

    return p;
  }

  /**
   * Creates a HBase {@link Increment} from a Storm {@link Tuple}
   * @param tuple The {@link Tuple}
   * @param increment The amount to increment the counter by
   * @return {@link Increment}
   */
  public Increment getIncrementFromTuple(final Tuple tuple, final long increment) {
    byte[] rowKey = Bytes.toBytes(tuple.getStringByField(tupleRowKeyField));

    Increment inc = new Increment(rowKey);
    inc.setWriteToWAL(writeToWAL);

    if (columnFamilies.size() > 0) {
      for (String cf : columnFamilies.keySet()) {
        byte[] cfBytes = Bytes.toBytes(cf);
        for (String cq : columnFamilies.get(cf)) {
          byte[] val;
          try {
            val = Bytes.toBytes(tuple.getStringByField(cq));
          } catch (IllegalArgumentException ex) {
            // if cq isn't a tuple field, use cq for counter instead of tuple
            // value
            val = Bytes.toBytes(cq);
          }
          inc.addColumn(cfBytes, val, increment);
        }
      }
    }

    return inc;
  }

  /**
   * Increment the counter for the given family and column by the specified amount
   * <p>
   * If the family and column already exist in the Increment the counter value is incremented by the
   * specified amount rather than overridden, as it is in HBase's
   * {@link Increment#addColumn(byte[], byte[], long)} method
   * @param inc The {@link Increment} to update
   * @param family The column family
   * @param qualifier The column qualifier
   * @param amount The amount to increment the counter by
   */
  public static void addIncrement(Increment inc, final byte[] family, final byte[] qualifier,
      final Long amount) {

    NavigableMap<byte[], Long> set = inc.getFamilyMap().get(family);
    if (set == null) {
      set = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
    }

    // If qualifier exists, increment amount
    Long counter = set.get(qualifier);
    if (counter == null) {
      counter = 0L;
    }
    set.put(qualifier, amount + counter);

    inc.getFamilyMap().put(family, set);
  }

  /**
   * @return the tableName
   */
  public String getTableName() {
    return tableName;
  }

  /**
   * @return Whether batch mode is enabled
   */
  public boolean isBatch() {
    return batch;
  }

  /**
   * @param batch Whether to enable HBase's client-side write buffer.
   *          <p>
   *          When enabled your bolt will store put operations locally until the write buffer is
   *          full, so they can be sent to HBase in a single RPC call. When disabled each put
   *          operation is effectively an RPC and is sent straight to HBase. As your bolt can
   *          process thousands of values per second it is recommended that the write buffer is
   *          enabled.
   *          <p>
   *          Enabled by default
   */
  public void setBatch(boolean batch) {
    this.batch = batch;
  }

  /**
   * @param writeToWAL Sets whether to write to HBase's edit log.
   *          <p>
   *          Setting to false will mean fewer operations to perform when writing to HBase and hence
   *          better performance, but changes that haven't been flushed to a store file will be lost
   *          in the event of HBase failure
   *          <p>
   *          Enabled by default
   */
  public void setWriteToWAL(boolean writeToWAL) {
    this.writeToWAL = writeToWAL;
  }

  /**
   * @return True if write to HBase's edit log (WAL), false if not
   */
  public boolean isWriteToWAL() {
    return writeToWAL;
  }

  /**
   * @param writeBufferSize Overrides the client-side write buffer size.
   *          <p>
   *          By default the write buffer size is 2 MB (2097152 bytes). If you are storing larger
   *          data, you may want to consider increasing this value to allow your bolt to efficiently
   *          group together a larger number of records per RPC
   *          <p>
   *          Overrides the write buffer size you have set in your hbase-site.xml e.g.
   *          <code>hbase.client.write.buffer</code>
   */
  public void setWriteBufferSize(long writeBufferSize) {
    this.writeBufferSize = writeBufferSize;
  }

  /**
   * @return the writeBufferSize
   */
  public long getWriteBufferSize() {
    return writeBufferSize;
  }

  /**
   * @return A Set of configured column families
   */
  public Set<String> getColumnFamilies() {
    return this.columnFamilies.keySet();
  }

  /**
   * @return the tupleRowKeyField
   */
  public String getTupleRowKeyField() {
    return tupleRowKeyField;
  }
}
TOP

Related Classes of backtype.storm.contrib.hbase.utils.TupleTableConfig

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.