Package havrobase

Source Code of havrobase.HAB

package havrobase;

import avrobase.AvroBaseException;
import avrobase.AvroBaseImpl;
import avrobase.AvroFormat;
import avrobase.Row;
import avrobase.TimestampGenerator;
import com.google.common.base.Supplier;
import com.google.inject.Inject;
import com.google.inject.internal.Nullable;
import com.google.inject.name.Named;
import org.apache.avro.Schema;
import org.apache.avro.specific.SpecificRecord;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.security.SecureRandom;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Random;

/**
* HAvroBase client.
*
* TODO: remove createType, use KeyGenerator in avrobase exclusively
*
* <p/>
* User: sam
* Date: Jun 8, 2010
* Time: 5:13:35 PM
*/
public class HAB<T extends SpecificRecord> extends AvroBaseImpl<T, byte[]> {

  // Avro Table Constants
  private final byte[] VERSION_COLUMN = $("v");
  private final byte   VERSION_COLUMN_BYTE = 118;
  private final byte[] SCHEMA_COLUMN = $("s");
  private final byte   SCHEMA_COLUMN_BYTE = 115;
  private final byte[] DATA_COLUMN = $("d");
  private final byte   DATA_COLUMN_BYTE = 100;
  private final byte[] FORMAT_COLUMN = $("f");
  private final byte   FORMAT_COLUMN_BYTE = 102;

  private final byte[] SEQUENCE_ROW = new byte[0];
  private final byte[] SEQUENCE_COLUMN = $("i");

  // Schema Table constants
  private final byte[] AVRO_FAMILY = $("avro");

  // Cache the schemas with a two-way lookup
  private HTablePool pool;
  private HBaseAdmin admin;
  private byte[] tableName;
  private byte[] family;
  private byte[] schemaName;
  private CreateType createType;
  private Supplier<byte[]> keygen;
  protected static final TimestampGenerator TIMESTAMP_GENERATOR = new TimestampGenerator();

  public enum CreateType {
    CUSTOM,
    RANDOM,
    SEQUENTIAL,
    TIMESTAMP,
    REVERSE_TIMESTAMP
  }

  /**
   * Load the schema map on init and then keep it up to date from then on. The HBase
   * connectivity is provided usually via Guice.
   *
   * @param pool
   * @param admin
   * @throws AvroBaseException
   */
  @Inject
  public HAB(
      Schema expectedSchema,
      HTablePool pool,
      HBaseAdmin admin,
      @Named("table") byte[] tableName,
      @Named("family") byte[] family,
      @Named("schema") byte[] schemaName,
      AvroFormat format,
      CreateType createType,
      @Nullable Supplier<byte[]> keygen
  ) throws AvroBaseException {
    super(expectedSchema, format);
    this.pool = pool;
    this.admin = admin;
    this.tableName = tableName;
    this.family = family;
    this.schemaName = schemaName;
    this.createType = createType;
    this.keygen = keygen;

    if (createType == CreateType.CUSTOM && keygen == null) {
      throw new IllegalArgumentException("keygen must be non-null");
    }

    HTableInterface schemaTable;
    try {
      schemaTable = pool.getTable(this.schemaName);
    } catch (RuntimeException e) {
      if (e.getCause() instanceof TableNotFoundException) {
        schemaTable = createSchemaTable();
      } else {
        throw new AvroBaseException(e.getCause());
      }
    }
    try {
      loadSchemas(schemaTable);
    } catch (IOException e) {
      throw new AvroBaseException(e);
    } finally {
      pool.putTable(schemaTable);
    }
    HTableInterface table = getTable();
    try {
      if (table.getTableDescriptor().getFamily(family) == null) {
        HColumnDescriptor familyDesc = getColumnDesc(family);
        try {
          admin.disableTable(tableName);
          admin.addColumn(tableName, familyDesc);
          admin.enableTable(tableName);
        } catch (IOException e) {
          throw new AvroBaseException(e);
        }
      }
    } catch (IOException e) {
      throw new AvroBaseException(e);
    } finally {
      pool.putTable(table);
    }
  }

  // Load all the schemas currently registered in hbase

  private void loadSchemas(HTableInterface schemaTable) throws IOException {
    Scan scan = new Scan();
    scan.addColumn(AVRO_FAMILY, SCHEMA_COLUMN);
    ResultScanner scanner = schemaTable.getScanner(scan);
    for (Result result : scanner) {
      String row = $_(result.getRow());
      byte[] value = result.getValue(AVRO_FAMILY, SCHEMA_COLUMN);
      loadSchema(value, row);
    }
  }

  // Given a table and family, create a corresponding table and
  // family with hbase.

  private HTableInterface createSchemaTable() throws AvroBaseException {
    HTableInterface schemaTable;
    HColumnDescriptor family = new HColumnDescriptor(AVRO_FAMILY);
    family.setMaxVersions(1);
    family.setCompressionType(Compression.Algorithm.LZO);
    family.setInMemory(true);
    HTableDescriptor tableDesc = new HTableDescriptor(schemaName);
    tableDesc.addFamily(family);
    try {
      admin.createTable(tableDesc);
    } catch (IOException e1) {
      throw new AvroBaseException(e1);
    }
    schemaTable = pool.getTable(schemaName);
    return schemaTable;
  }

  @Override
  public Row<T, byte[]> get(byte[] row) throws AvroBaseException {
    HTableInterface table = getTable();
    try {
      Result result = getHBaseRow(table, row, family);
      // TODO: This is working around a bug in HBASE 0.89
      if (row.length == 0 && !Bytes.equals(row, result.getRow())) {
        return null;
      }
      return getRowResult(result, row);
    } catch (IOException e) {
      throw new AvroBaseException(e);
    } finally {
      pool.putTable(table);
    }
  }

  private Random random = new SecureRandom();

  @Override
  public byte[] create(T value) throws AvroBaseException {
    switch (createType) {
      case CUSTOM: {
        // loop until we don't get an ID collision
        byte[] row;
        do {
          row = keygen.get();
        } while (!put(row, value, 0));
        return row;
      }
      case RANDOM: {
        // loop until we don't get a random ID collision
        byte[] row;
        do {
          row = Bytes.toBytes(random.nextLong());
        } while (!put(row, value, 0));
        return row;
      }
      case SEQUENTIAL: {
        HTableInterface table = getTable();
        try {
          byte[] row;
          do {
            row = getNextRow(table, family);
          } while (!put(row, value, 0));
          return row;
        } catch (IOException e) {
          throw new AvroBaseException("Failed to increment column", e);
        } finally {
          pool.putTable(table);
        }
      }
      case TIMESTAMP:
      case REVERSE_TIMESTAMP: {
        HTableInterface table = getTable();
        try {
          byte[] row;
          do {
            long l = createType == CreateType.TIMESTAMP ?
                    TIMESTAMP_GENERATOR.getTimestamp() :
                    TIMESTAMP_GENERATOR.getInvertedTimestamp();
            row = Bytes.toBytes(l);
          } while (!put(row, value, 0));
          return row;
        } finally {
          pool.putTable(table);
        }
      }
    }
    return null;
  }

  private byte[] getNextRow(HTableInterface table, final byte[] family) throws IOException {
    byte[] row;
    long l = table.incrementColumnValue(SEQUENCE_ROW, family, SEQUENCE_COLUMN, 1);
    row = String.valueOf(l).getBytes();
    int length = row.length;
    for (int i = 0; i < length / 2; i++) {
      byte tmp = row[i];
      row[i] = row[length - i - 1];
      row[length - i - 1] = tmp;
    }
    return row;
  }

  @Override
  public void put(byte[] row, T value) throws AvroBaseException {
    HTableInterface table = getTable();
    long version;
    try {
      do {
        // FIXME: Spin until success, last one wins. Provably dangerous?
        version = getVersion(family, row, table);
      } while (!put(row, value, version));
    } catch (IOException e) {
      throw new AvroBaseException("Failed to retrieve version for row: " + $_(row), e);
    } finally {
      pool.putTable(table);
    }
  }

  @Override
  public boolean put(byte[] row, T value, long version) throws AvroBaseException {
    HTableInterface table = getTable();
    try {
      Schema schema = value.getSchema();
      String schemaKey = storeSchema(schema);
      byte[] bytes = serialize(value);
      Put put = new Put(row);
      put.add(family, SCHEMA_COLUMN, $(schemaKey));
      put.add(family, DATA_COLUMN, bytes);
      put.add(family, VERSION_COLUMN, Bytes.toBytes(version + 1));
      put.add(family, FORMAT_COLUMN, Bytes.toBytes(format.ordinal()));
      final byte[] expectedValue;
      if (version == 0) {
        expectedValue = new byte[0]; // TODO: should be null, but... HBASE-2920
      } else {
        expectedValue = Bytes.toBytes(version);
      }
      return table.checkAndPut(row, family, VERSION_COLUMN, expectedValue, put);
    } catch (IOException e) {
      throw new AvroBaseException("Could not encode " + value, e);
    } finally {
      pool.putTable(table);
    }
  }

  @Override
  public void delete(byte[] row) throws AvroBaseException {
    HTableInterface table = getTable();
    try {
      Delete delete = new Delete(row);
      delete.deleteFamily(family);
      table.delete(delete);
    } catch (IOException e) {
      throw new AvroBaseException("Failed to delete row", e);
    } finally {
      pool.putTable(table);
    }
  }

  @Override
  public Iterable<Row<T, byte[]>> scan(byte[] startRow, byte[] stopRow) throws AvroBaseException {
    Scan scan = new Scan();
    scan.addFamily(family);
    if (startRow != null) {
      scan.setStartRow(startRow);
    }
    if (stopRow != null) {
      scan.setStopRow(stopRow);
    }
    HTableInterface table = pool.getTable(tableName);
    try {
      ResultScanner scanner = table.getScanner(scan);
      final Iterator<Result> results = scanner.iterator();
      return new Iterable<Row<T, byte[]>>() {
        @Override
        public Iterator<Row<T, byte[]>> iterator() {
          return new Iterator<Row<T, byte[]>>() {
            Row<T, byte[]> r;

            @Override
            public boolean hasNext() {
              if (r != null) return true;
              while (results.hasNext()) {
                Result result = results.next();
                r = getRowResult(result, result.getRow());
                // Skip empty rows and the increment row
                if (r == null || r.row.length == 0) {
                  continue;
                }
                return true;
              }
              return false;
            }

            @Override
            public Row<T, byte[]> next() {
              if (hasNext()) {
                try {
                  return r;
                } finally {
                  r = null;
                }
              }
              throw new NoSuchElementException();
            }

            @Override
            public void remove() {
              throw new NotImplementedException();
            }
          };
        }
      };
    } catch (IOException e) {
      throw new AvroBaseException(e);
    } finally {
      // FIXME: Is this safe?
      pool.putTable(table);
    }
  }

  // Given an HBase row result take it apart and populate the Row wrapper metadata.

  private Row<T, byte[]> getRowResult(Result result, byte[] row) throws AvroBaseException {
    // Defaults
    try {

      byte[] dataBytes = null;
      int dataOffset = -1;
      int dataLength = 0;

      long version = -1;
      Schema schema = null;
      AvroFormat format = AvroFormat.BINARY;

      KeyValue[] raw = result.raw();
      for (KeyValue kv : raw) {
        byte[] buffer = kv.getBuffer();
        int offset = kv.getValueOffset();
        int length = kv.getValueLength();
        switch(buffer[kv.getQualifierOffset()]) {
          case DATA_COLUMN_BYTE:
            dataBytes = buffer;
            dataOffset = offset;
            dataLength = length;
            break;
          case VERSION_COLUMN_BYTE:
            version = Bytes.toLong(buffer, offset, length);
            break;
          case FORMAT_COLUMN_BYTE:
            format = AvroFormat.values()[Bytes.toInt(buffer, offset, length)];
            break;
          case SCHEMA_COLUMN_BYTE:
            schema = loadSchema(row, buffer, offset, length);
            break;
        }
      }

      if (dataBytes != null) {
        // If not, load it up and return wrapped Row
        return new Row<T, byte[]>(readValue(dataBytes, schema, format, dataOffset, dataLength), row, version);
      }
      return null;
    } catch (IOException e) {
      throw new AvroBaseException(e);
    }
  }

  // Pull the version out of the version column. Version 0 means that it does not exist
  // in the hbase row

  private long getVersion(byte[] columnFamily, byte[] row, HTableInterface table) throws IOException {
    Get get = new Get(row);
    get.addColumn(columnFamily, VERSION_COLUMN);
    Result result = table.get(get);
    byte[] versionB = result.getValue(columnFamily, VERSION_COLUMN);
    long version;
    if (versionB == null) {
      version = 0;
    } else {
      version = Bytes.toLong(versionB);
    }
    return version;
  }

  // Ensure that this schema is present within the configured schema table

  private String storeSchema(Schema schema) throws AvroBaseException {
    String schemaKey;
    synchronized (schema) {
      schemaKey = hashCache.get(schema);
      if (schemaKey == null) {
        // Hash the schema, store it
        String doc = schema.toString();
        schemaKey = createSchemaKey(schema, doc);
        Put put = new Put($(schemaKey));
        put.add(AVRO_FAMILY, SCHEMA_COLUMN, $(doc));
        HTableInterface schemaTable = pool.getTable(schemaName);
        try {
          schemaTable.put(put);
        } catch (IOException e) {
          throw new AvroBaseException("Could not store schema " + doc, e);
        } finally {
          pool.putTable(schemaTable);
        }
      }
    }
    return schemaKey;
  }

  // Pull an hbase row, ready to be wrapped by Row

  private Result getHBaseRow(HTableInterface table, byte[] row, byte[] columnFamily) throws IOException {
    Get get = new Get(row);
    get.addColumn(columnFamily, DATA_COLUMN);
    get.addColumn(columnFamily, SCHEMA_COLUMN);
    get.addColumn(columnFamily, VERSION_COLUMN);
    get.addColumn(columnFamily, FORMAT_COLUMN);
    return table.get(get);
  }

  // Load a schema from the current hbase row

  private Schema loadSchema(final byte[] row, final byte[] schemaKey, int offset, int length) throws AvroBaseException, IOException {
    if (schemaKey == null) {
      throw new AvroBaseException("Schema not set for row: " + $_(row));
    }
    Schema schema = schemaCache.get($_(schemaKey, offset, length));
    if (schema == null) {
      HTableInterface schemaTable = pool.getTable(schemaName);
      try {
        Get schemaGet = new Get(schemaKey);
        schemaGet.addColumn(AVRO_FAMILY, SCHEMA_COLUMN);
        byte[] schemaBytes = schemaTable.get(schemaGet).getValue(AVRO_FAMILY, SCHEMA_COLUMN);
        if (schemaBytes == null) {
          throw new AvroBaseException("No schema " + $_(schemaKey) + " found in hbase for row " + $_(row));
        }
        schema = loadSchema(schemaBytes, $_(schemaKey));
      } finally {
        pool.putTable(schemaTable);
      }
    }
    return schema;
  }

  private String $_(byte[] schemaKey, int offset, int length) {
    return Bytes.toString(schemaKey, offset, length);
  }

  // Get or create the specified table with columnfamily

  private HTableInterface getTable() throws AvroBaseException {
    HTableInterface table;
    try {
      table = pool.getTable(tableName);
    } catch (RuntimeException e) {
      if (e.getCause() instanceof TableNotFoundException) {
        HColumnDescriptor familyDesc = getColumnDesc(family);
        HTableDescriptor tableDesc = new HTableDescriptor(tableName);
        tableDesc.addFamily(familyDesc);
        try {
          admin.createTable(tableDesc);
        } catch (IOException e1) {
          throw new AvroBaseException(e1);
        }
      } else {
        throw new AvroBaseException(e.getCause());
      }
      table = pool.getTable(tableName);
    }
    return table;
  }

  private HColumnDescriptor getColumnDesc(byte[] columnFamily) {
    HColumnDescriptor family = new HColumnDescriptor(columnFamily);
    family.setMaxVersions(1);
    family.setCompressionType(Compression.Algorithm.LZO);
    family.setInMemory(false);
    return family;
  }

  protected byte[] $(String string) {
    return Bytes.toBytes(string);
  }

  protected String $_(byte[] bytes) {
    return Bytes.toString(bytes);
  }
}
TOP

Related Classes of havrobase.HAB

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.