/**
* (c) Copyright 2014 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.schema.impl.cassandra;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.exceptions.InvalidTypeException;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.curator.framework.CuratorFramework;
import org.apache.hadoop.hbase.HConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.annotations.ApiAudience;
import org.kiji.commons.ByteUtils;
import org.kiji.schema.KijiNotInstalledException;
import org.kiji.schema.KijiSchemaTable;
import org.kiji.schema.KijiURI;
import org.kiji.schema.avro.MD5Hash;
import org.kiji.schema.avro.SchemaTableBackup;
import org.kiji.schema.avro.SchemaTableEntry;
import org.kiji.schema.cassandra.CassandraTableName;
import org.kiji.schema.util.BytesKey;
import org.kiji.schema.util.DebugResourceTracker;
import org.kiji.schema.util.Lock;
import org.kiji.schema.zookeeper.ZooKeeperLock;
import org.kiji.schema.zookeeper.ZooKeeperUtils;
/**
* <p>
* Mapping between schema IDs, hashes and Avro schema objects.
* This class is thread-safe.
* </p>
*
* <p>
* Schemas are stored in two tables with a single column family named "schema" and that contains
* SchemaTableEntry records. One table is indexed by schema hashes (128-bit MD5 hashes of the
* schema JSON representation). Other table is indexed by schema IDs (integers >= 0).
* There is a third table with a counter for the Schema IDs.
*
* There may be multiple schema IDs for a single schema.
* </p>
*/
@ApiAudience.Private
public final class CassandraSchemaTable implements KijiSchemaTable {
private static final Logger LOG = LoggerFactory.getLogger(CassandraSchemaTable.class);
/** The column name in C* for the keys in the schema hash table. */
public static final String SCHEMA_COLUMN_HASH_KEY = "schema_hash";
/** The column name in C* for the keys in the schema ID table. */
public static final String SCHEMA_COLUMN_ID_KEY = "schema_id";
/** The column name in C* for the values in the schema hash and ID tables (same for both). */
public static final String SCHEMA_COLUMN_VALUE = "schema_blob";
/** The column name for the timestamp value in the schema hash and ID tables (same for both). */
public static final String SCHEMA_COLUMN_TIME = "time";
/** We need some kind of PRIMARY KEY column for the counter table. */
public static final String SCHEMA_COUNTER_COLUMN_KEY = "counter_key";
/** We should have only one row ever in this table... */
public static final String SCHEMA_COUNTER_ONLY_KEY_VALUE = "THE_ONLY_COUNTER";
/**
* The column name of the C* counter used to store schema IDs. In C*, counters go into their
* own tables.
*/
public static final String SCHEMA_COUNTER_COLUMN_VALUE = "counter";
/** Cassandra cluster connection. */
private final CassandraAdmin mAdmin;
/** C* table used to map schema hash to schema entries. */
private final CassandraTableName mSchemaHashTable;
/** C* table used to map schema IDs to schema entries. */
private final CassandraTableName mSchemaIdTable;
/** C* table used to increment schema IDs. */
private final CassandraTableName mCounterTable;
/** Connection to ZooKeeper. */
private final CuratorFramework mZKClient;
/** Lock for the kiji instance schema table. */
private final Lock mZKLock;
/** Maps schema MD5 hashes to schema entries. */
private final Map<BytesKey, SchemaEntry> mSchemaHashMap = new HashMap<BytesKey, SchemaEntry>();
/** Maps schema IDs to schema entries. */
private final Map<Long, SchemaEntry> mSchemaIdMap = new HashMap<Long, SchemaEntry>();
/** Schema hash cache. */
private final SchemaHashCache mHashCache = new SchemaHashCache();
/** KijiURI of the Kiji instance this schema table belongs to. */
private final KijiURI mInstanceURI;
/** States of a SchemaTable instance. */
private static enum State {
UNINITIALIZED,
OPEN,
CLOSED
}
/** Tracks the state of this SchemaTable instance. */
private AtomicReference<State> mState = new AtomicReference<State>(State.UNINITIALIZED);
/** Avro decoder factory. */
private static final DecoderFactory DECODER_FACTORY = DecoderFactory.get();
/** Avro encoder factory. */
private static final EncoderFactory ENCODER_FACTORY = EncoderFactory.get();
/** Avro reader for a schema entry. */
private static final DatumReader<SchemaTableEntry> SCHEMA_ENTRY_READER =
new SpecificDatumReader<SchemaTableEntry>(SchemaTableEntry.SCHEMA$);
/** Avro writer for a schema entry. */
private static final DatumWriter<SchemaTableEntry> SCHEMA_ENTRY_WRITER =
new SpecificDatumWriter<SchemaTableEntry>(SchemaTableEntry.SCHEMA$);
/** Prepared statement for reading from the hash table. */
private PreparedStatement mPreparedStatementReadHashTable = null;
/** Prepared statement for writing to the hash table. */
private PreparedStatement mPreparedStatementWriteHashTable = null;
/** Prepared statement for writing to the ID table. */
private PreparedStatement mPreparedStatementWriteIdTable = null;
/** {@inheritDoc} */
@Override
public BytesKey getSchemaHash(Schema schema) {
return mHashCache.getHash(schema);
}
/**
* Prepare the statement for writing to the hash table.
*
* @throws java.io.IOException if there is a problem preparing the statement.
*/
private void prepareQueryWriteHashTable() throws IOException {
String hashQueryText = String.format(
"INSERT INTO %s (%s, %s, %s) VALUES(?, ?, ?);",
mSchemaHashTable,
SCHEMA_COLUMN_HASH_KEY,
SCHEMA_COLUMN_TIME,
SCHEMA_COLUMN_VALUE);
mPreparedStatementWriteHashTable = mAdmin.getPreparedStatement(hashQueryText);
}
/**
* Prepare the statement for writing to the ID table.
*
* @throws java.io.IOException if there is a problem preparing the statement.
*/
private void prepareQueryWriteIdTable() throws IOException {
String idQueryText = String.format(
"INSERT INTO %s (%s, %s, %s) VALUES(?, ?, ?);",
mSchemaIdTable,
SCHEMA_COLUMN_ID_KEY,
SCHEMA_COLUMN_TIME,
SCHEMA_COLUMN_VALUE);
mPreparedStatementWriteIdTable = mAdmin.getPreparedStatement(idQueryText);
}
/**
* Prepare the statement for reading from the hash table.
*
* @throws java.io.IOException if there is a problem preparing the statement.
*/
private void prepareQueryReadHashTable() throws IOException {
String queryText = String.format(
"SELECT %s FROM %s WHERE %s=? ORDER BY %s DESC LIMIT 1",
SCHEMA_COLUMN_VALUE,
mSchemaHashTable,
SCHEMA_COLUMN_HASH_KEY,
SCHEMA_COLUMN_TIME);
mPreparedStatementReadHashTable = mAdmin.getPreparedStatement(queryText);
}
/**
* Decodes a binary-encoded Avro schema entry.
*
* @param bytes Binary-encoded Avro schema entry.
* @return Decoded Avro schema entry.
* @throws java.io.IOException on I/O error.
*/
public static SchemaTableEntry decodeSchemaEntry(final byte[] bytes) throws IOException {
final SchemaTableEntry entry = new SchemaTableEntry();
final Decoder decoder =
DECODER_FACTORY.directBinaryDecoder(new ByteArrayInputStream(bytes), null);
return SCHEMA_ENTRY_READER.read(entry, decoder);
}
/**
* Encodes an Avro schema entry into binary.
*
* @param avroEntry Avro schema entry to encode.
* @return Binary-encoded Avro schema entry.
* @throws java.io.IOException on I/O error.
*/
public static byte[] encodeSchemaEntry(final SchemaTableEntry avroEntry) throws IOException {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream(4096);
final Encoder encoder = ENCODER_FACTORY.directBinaryEncoder(bytes, null);
SCHEMA_ENTRY_WRITER.write(avroEntry, encoder);
return bytes.toByteArray();
}
/**
* Wrap an existing HBase table assumed to be where the schema data is stored.
*
* @param admin Cassandra connection.
* @param instanceURI URI of the Kiji instance this schema table belongs to.
* @throws java.io.IOException on I/O error.
*/
public CassandraSchemaTable(CassandraAdmin admin, KijiURI instanceURI)
throws IOException {
mAdmin = Preconditions.checkNotNull(admin);
mInstanceURI = instanceURI;
mSchemaHashTable = CassandraTableName.getSchemaHashTableName(instanceURI);
mSchemaIdTable = CassandraTableName.getSchemaIdTableName(instanceURI);
mCounterTable = CassandraTableName.getSchemaCounterTableName(instanceURI);
if (!mAdmin.tableExists(mSchemaHashTable)) {
throw new KijiNotInstalledException("Schema hash table not installed.", instanceURI);
}
if (!mAdmin.tableExists(mSchemaIdTable)) {
throw new KijiNotInstalledException("Schema ID table not installed.", instanceURI);
}
if (!mAdmin.tableExists(mCounterTable)) {
throw new KijiNotInstalledException("Schema counter table not installed.", instanceURI);
}
mZKClient = ZooKeeperUtils.getZooKeeperClient(instanceURI.getZooKeeperEnsemble());
mZKLock = new ZooKeeperLock(mZKClient, ZooKeeperUtils.getSchemaTableLock(mInstanceURI));
final State oldState = mState.getAndSet(State.OPEN);
Preconditions.checkState(oldState == State.UNINITIALIZED,
"Cannot open SchemaTable instance in state %s.", oldState);
DebugResourceTracker.get().registerResource(this);
// Prepare queries that we'll use multiple times
prepareQueryWriteHashTable();
prepareQueryWriteIdTable();
prepareQueryReadHashTable();
}
/**
* Looks up a schema entry given an Avro schema object.
*
* Looks first in-memory. If the schema is not known in-memory, looks in the Cassandra tables.
*
* @param schema Avro schema to look up.
* @return Either the pre-existing entry for the specified schema, or a newly created entry.
* @throws java.io.IOException on I/O error.
*/
private synchronized SchemaEntry getOrCreateSchemaEntry(final Schema schema) throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot get or create schema entry from SchemaTable instance in state %s.", state);
final BytesKey schemaHash = getSchemaHash(schema);
final SchemaEntry knownEntry = getSchemaEntry(schemaHash);
if (knownEntry != null) {
return knownEntry;
}
// Schema is unknown, both in-memory and in-table.
// Allocate a new schema ID and write it down to the tables:
return storeInMemory(registerNewSchemaInTable(schema, schemaHash));
}
/** {@inheritDoc} */
@Override
public long getOrCreateSchemaId(final Schema schema) throws IOException {
return getOrCreateSchemaEntry(schema).getId();
}
/** {@inheritDoc} */
@Override
public BytesKey getOrCreateSchemaHash(final Schema schema) throws IOException {
return getOrCreateSchemaEntry(schema).getHash();
}
/**
* Registers a new schema into the schema tables.
*
* The following things happen atomically, while holding a lock on the counter row:
* <li> look up the schema from the hash table, returning the entry if it is found; </li>
* <li> allocate a new unique ID for the schema (by incrementing the schema counter); </li>
* <li> write the new schema entry to the hash table and the ID table. </li>
*
* @param schema Avro schema to register
* @param schemaHash hash of the schema
* @return Fully populated SchemaEntry
* @throws java.io.IOException on I/O error.
*/
private SchemaEntry registerNewSchemaInTable(final Schema schema, final BytesKey schemaHash)
throws IOException {
mZKLock.lock();
try {
final SchemaTableEntry existingAvroEntry = loadFromHashTable(schemaHash);
if (existingAvroEntry != null) {
return fromAvroEntry(existingAvroEntry);
}
// Here we know the schema is unknown from the schema tables and no other process can
// update the schema table.
incrementSchemaIdCounter(1);
long schemaId = readSchemaIdCounter() - 1;
final SchemaEntry entry = new SchemaEntry(schemaId, schemaHash, schema);
storeInTable(toAvroEntry(entry));
return entry;
} finally {
mZKLock.unlock();
}
}
/**
* Increment the schema ID counter.
* @param incrementAmount Amount by which to increment the counter (can be negative).
*/
private void incrementSchemaIdCounter(long incrementAmount) {
CassandraTableName tableName = mCounterTable;
String incrementSign = incrementAmount >= 0 ? "+" : "-";
String queryText = String.format("UPDATE %s SET %s = %s %s %d WHERE %s='%s';",
tableName,
SCHEMA_COUNTER_COLUMN_VALUE,
SCHEMA_COUNTER_COLUMN_VALUE,
incrementSign,
incrementAmount,
SCHEMA_COUNTER_COLUMN_KEY,
SCHEMA_COUNTER_ONLY_KEY_VALUE);
mAdmin.execute(queryText);
}
/**
* Read back the current value of the schema ID counter.
* @return Value of the counter.
*/
private long readSchemaIdCounter() {
// Sanity check that counter value is 1!
String queryText = String.format("SELECT * FROM %s;", mCounterTable);
ResultSet resultSet = mAdmin.execute(queryText);
List<Row> rows = resultSet.all();
assert(rows.size() == 1);
Row row = rows.get(0);
return row.getLong(SCHEMA_COUNTER_COLUMN_VALUE);
}
/**
* Used for resetting the schema ID counter.
*
* This is fairly hackish and relies upon the counter being locked with a ZooKeeper lock.
* @param newCounterValue Value to which to set the counter.
*/
private void setSchemaIdCounter(long newCounterValue) {
// Get the current counter value
long currentValue = readSchemaIdCounter();
incrementSchemaIdCounter(newCounterValue - currentValue);
}
/**
* Writes the given schema entry to the ID and hash tables.
*
* This is not protected from concurrent writes. Caller must ensure consistency.
*
* @param avroEntry Schema entry to write.
* @throws java.io.IOException on I/O error.
*/
private void storeInTable(final SchemaTableEntry avroEntry)
throws IOException {
storeInTable(avroEntry, HConstants.LATEST_TIMESTAMP, true);
}
/**
* Writes the given schema entry to the ID and hash tables.
*
* This is not protected from concurrent writes. Caller must ensure consistency.
*
* @param avroEntry Schema entry to write.
* @param timestamp Write entries with this timestamp.
* @param flush Whether to flush tables synchronously.
* @throws java.io.IOException on I/O error.
*/
private void storeInTable(final SchemaTableEntry avroEntry, long timestamp, boolean flush)
throws IOException {
final byte[] entryBytes = encodeSchemaEntry(avroEntry);
// TODO: Obviate this comment by doing all of this in batch.
// Writes the ID mapping first: if the hash table write fails, we just lost one schema ID.
// The hash table write must not happen before the ID table write has been persisted.
// Otherwise, another client may see the hash entry, write cells with the schema ID that cannot
// be decoded (since the ID mapping has not been written yet).
final ResultSet resultSet = mAdmin.execute(
mPreparedStatementWriteIdTable.bind(
avroEntry.getId(),
new Date(timestamp),
ByteBuffer.wrap(entryBytes))
);
Preconditions.checkNotNull(resultSet);
// TODO: Anything here to flush the table or verify that this worked?
//if (flush) { mSchemaIdTable.flushCommits(); }
final ResultSet hashResultSet =
mAdmin.execute(
mPreparedStatementWriteHashTable.bind(
ByteBuffer.wrap(avroEntry.getHash().bytes()),
new Date(timestamp),
ByteBuffer.wrap(entryBytes))
);
Preconditions.checkNotNull(hashResultSet);
// TODO: Anything here to flush the table or verify that this worked?
//if (flush) { mSchemaHashTable.flushCommits(); }
}
/**
* Fetches a schema entry from the tables given a schema ID.
*
* @param schemaId schema ID
* @return Avro schema entry, or null if the schema ID does not exist in the table
* @throws java.io.IOException on I/O error.
*/
private SchemaTableEntry loadFromIdTable(long schemaId) throws IOException {
CassandraTableName tableName = mSchemaIdTable;
// TODO: Prepare this statement once in constructor, not every load.
final String queryText = String.format(
"SELECT %s FROM %s WHERE %s=%d ORDER BY %s DESC LIMIT 1",
SCHEMA_COLUMN_VALUE,
tableName,
SCHEMA_COLUMN_ID_KEY,
schemaId,
SCHEMA_COLUMN_TIME);
final ResultSet resultSet = mAdmin.execute(queryText);
final List<Row> rows = resultSet.all();
if (0 == rows.size()) {
return null;
}
assert(rows.size() == 1);
final byte[] schemaAsBytes =
ByteUtils.toBytes(rows.get(0).getBytes(SCHEMA_COLUMN_VALUE));
return decodeSchemaEntry(schemaAsBytes);
}
/**
* Fetches a schema entry from the tables given a schema hash.
*
* @param schemaHash schema hash
* @return Avro schema entry, or null if the schema hash does not exist in the table
* @throws java.io.IOException on I/O error.
*/
private SchemaTableEntry loadFromHashTable(BytesKey schemaHash) throws IOException {
final ByteBuffer tableKey = ByteBuffer.wrap(schemaHash.getBytes());
final ResultSet resultSet = mAdmin.execute(mPreparedStatementReadHashTable.bind(tableKey));
final List<Row> rows = resultSet.all();
if (0 == rows.size()) {
return null;
}
assert(rows.size() == 1);
final byte[] schemaAsBytes =
ByteUtils.toBytes(rows.get(0).getBytes(SCHEMA_COLUMN_VALUE));
return decodeSchemaEntry(schemaAsBytes);
}
/**
* Converts an Avro SchemaTableEntry into a SchemaEntry.
*
* @param avroEntry Avro SchemaTableEntry
* @return an equivalent SchemaEntry
*/
public static SchemaEntry fromAvroEntry(final SchemaTableEntry avroEntry) {
final String schemaJson = avroEntry.getAvroSchema();
final Schema schema = new Schema.Parser().parse(schemaJson);
return new SchemaEntry(avroEntry.getId(), new BytesKey(avroEntry.getHash().bytes()), schema);
}
/**
* Converts a SchemaEntry into an Avro SchemaTableEntry.
*
* @param entry a SchemaEntry.
* @return an equivalent Avro SchemaTableEntry.
*/
public static SchemaTableEntry toAvroEntry(final SchemaEntry entry) {
return SchemaTableEntry
.newBuilder()
.setId(entry.getId())
.setHash(new MD5Hash(entry.getHash().getBytes()))
.setAvroSchema(entry.getSchema().toString())
.build();
}
/** {@inheritDoc} */
@Override
public synchronized Schema getSchema(long schemaId) throws IOException {
final SchemaEntry entry = getSchemaEntry(schemaId);
return (entry == null) ? null : entry.getSchema();
}
/** {@inheritDoc} */
@Override
public synchronized SchemaEntry getSchemaEntry(long schemaId) throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot get schema entry from SchemaTable instance in state %s.", state);
final SchemaEntry existingEntry = mSchemaIdMap.get(schemaId);
if (existingEntry != null) {
return existingEntry;
}
// On a lookup miss from the local schema cache, check to see if we can get the schema
// from the original HBase table, cache it locally, and return it.
final SchemaTableEntry avroEntry = loadFromIdTable(schemaId);
if (avroEntry == null) {
return null;
}
return storeInMemory(avroEntry);
}
/** {@inheritDoc} */
@Override
public Schema getSchema(BytesKey schemaHash) throws IOException {
final SchemaEntry entry = getSchemaEntry(schemaHash);
return (entry == null) ? null : entry.getSchema();
}
/** {@inheritDoc} */
@Override
public synchronized SchemaEntry getSchemaEntry(BytesKey schemaHash) throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot get schema entry from SchemaTable instance in state %s.", state);
final SchemaEntry existingEntry = mSchemaHashMap.get(schemaHash);
if (existingEntry != null) {
return existingEntry;
}
// On a lookup miss from the local schema cache, check to see if we can get the schema
// from the original HBase table, cache it locally, and return it.
final SchemaTableEntry avroEntry = loadFromHashTable(schemaHash);
if (null == avroEntry) {
return null;
}
final SchemaEntry entry = storeInMemory(avroEntry);
Preconditions.checkState(schemaHash.equals(entry.getHash()));
return entry;
}
/** {@inheritDoc} */
@Override
public SchemaEntry getSchemaEntry(Schema schema) throws IOException {
return getSchemaEntry(getSchemaHash(schema));
}
/**
* Stores the specified schema entry in memory.
*
* External synchronization required.
*
* @param avroEntry Avro schema entry.
* @return the SchemaEntry stored in memory.
*/
private SchemaEntry storeInMemory(final SchemaTableEntry avroEntry) {
return storeInMemory(fromAvroEntry(avroEntry));
}
/**
* Stores the specified schema entry in memory.
*
* External synchronization required.
*
* @param entry the SchemaEntry to store in memory.
* @return the SchemaEntry stored in memory.
*/
private SchemaEntry storeInMemory(final SchemaEntry entry) {
// Replacing an hash-mapped entry may happen, if two different IDs were assigned to one schema.
final SchemaEntry oldHashEntry = mSchemaHashMap.put(entry.getHash(), entry);
if (oldHashEntry != null) {
LOG.info(String.format(
"Replacing hash-mapped schema entry:%n%s%nwith:%n%s", oldHashEntry, entry));
}
// Replacing an ID-mapped entry should never happen:
// IDs are associated to at most one schema/hash.
final SchemaEntry oldIdEntry = mSchemaIdMap.put(entry.getId(), entry);
if (oldIdEntry != null) {
throw new AssertionError(String.format(
"Attempting to replace ID-mapped schema entry:%n%s%nwith:%n%s", oldIdEntry, entry));
}
return entry;
}
/** {@inheritDoc} */
@Override
public synchronized void flush() throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot flush SchemaTable instance in state %s.", state);
// TODO: Replace with actual C* code
//mSchemaIdTable.flushCommits();
//mSchemaHashTable.flushCommits();
}
/** {@inheritDoc} */
@Override
public synchronized void close() throws IOException {
flush();
final State oldState = mState.getAndSet(State.CLOSED);
Preconditions.checkState(oldState == State.OPEN,
"Cannot close SchemaTable instance in state %s.", oldState);
DebugResourceTracker.get().unregisterResource(this);
mZKLock.close();
mZKClient.close();
}
/**
* Install the schema hash table.
*
* @param admin for the Kiji instance.
* @param tableName name of the schema hash table to create.
*/
private static void installHashTable(CassandraAdmin admin, CassandraTableName tableName) {
// Let's try to make this somewhat readable...
// TODO: Table should order by DESC for time
final String tableDescription = String.format(
"CREATE TABLE %s (%s blob, %s timestamp, %s blob, PRIMARY KEY (%s, %s));",
tableName,
SCHEMA_COLUMN_HASH_KEY,
SCHEMA_COLUMN_TIME,
SCHEMA_COLUMN_VALUE,
SCHEMA_COLUMN_HASH_KEY,
SCHEMA_COLUMN_TIME);
admin.createTable(tableName, tableDescription);
}
/**
* Install the schema ID table.
*
* @param admin for the Kiji instance.
* @param tableName name of the schema ID table to create.
*/
private static void installIdTable(CassandraAdmin admin, CassandraTableName tableName) {
// TODO: Table should order by DESC for time
final String tableDescription = String.format(
"CREATE TABLE %s (%s bigint, %s timestamp, %s blob, PRIMARY KEY (%s, %s));",
tableName,
SCHEMA_COLUMN_ID_KEY,
SCHEMA_COLUMN_TIME,
SCHEMA_COLUMN_VALUE,
SCHEMA_COLUMN_ID_KEY,
SCHEMA_COLUMN_TIME);
admin.createTable(tableName, tableDescription);
}
/**
* Install the schema ID counter table.
*
* @param admin for the Kiji instance.
* @param tableName name of the schema ID counter table to create.
* @throws java.io.IOException if there is a problem creating the Cassandra table.
*/
private static void installCounterTable(
CassandraAdmin admin,
CassandraTableName tableName) throws IOException {
final String tableDescription = String.format(
"CREATE TABLE %s (%s text PRIMARY KEY, %s counter);",
tableName,
SCHEMA_COUNTER_COLUMN_KEY,
SCHEMA_COUNTER_COLUMN_VALUE);
admin.createTable(tableName, tableDescription);
// Now set the counter to zero
final String updateQuery = String.format("UPDATE %s SET %s = %s + 0 WHERE %s='%s';",
tableName,
SCHEMA_COUNTER_COLUMN_VALUE,
SCHEMA_COUNTER_COLUMN_VALUE,
SCHEMA_COUNTER_COLUMN_KEY,
SCHEMA_COUNTER_ONLY_KEY_VALUE);
LOG.debug("Update query: {}.", updateQuery);
admin.execute(updateQuery);
// TODO: check if below is necessary, or leftover
// Sanity check that counter value is 1!
final String selectQuery = String.format("SELECT * FROM %s;", tableName);
final ResultSet resultSet = admin.execute(selectQuery);
final List<Row> rows = resultSet.all();
assert(rows.size() == 1);
final Row row = rows.get(0);
final long counterValue = row.getLong(SCHEMA_COUNTER_COLUMN_VALUE);
assert(0 == counterValue);
}
/**
* Install the schema table into a Kiji instance.
*
* @param admin The C* Admin interface for the HBase cluster to install into.
* @param kijiURI the KijiURI.
* @throws java.io.IOException on I/O error.
*/
public static void install(CassandraAdmin admin, KijiURI kijiURI) throws IOException {
// Keep all versions of schema entries:
// - entries of the ID table should never be written more than once.
// - entries of the hash table could be written more than once:
// - with different schema IDs in some rare cases, for example when a client crashes
// while writing an entry.
// - with different schemas on MD5 hash collisions.
installHashTable(admin, CassandraTableName.getSchemaHashTableName(kijiURI));
installIdTable(admin, CassandraTableName.getSchemaIdTableName(kijiURI));
installCounterTable(admin, CassandraTableName.getSchemaCounterTableName(kijiURI));
final CassandraSchemaTable schemaTable = new CassandraSchemaTable(admin, kijiURI);
try {
schemaTable.registerPrimitiveSchemas();
} finally {
schemaTable.close();
}
}
/**
* Deletes a C* table.
*
* @param admin C* admin client.
* @param tableName Name of the table to delete.
*/
private static void deleteTable(CassandraAdmin admin, CassandraTableName tableName) {
final String delete = CQLUtils.getDropTableStatement(tableName);
admin.execute(delete);
}
/**
* Disables and removes the schema table from HBase.
*
* @param admin The HBase Admin object.
* @param kijiURI The KijiURI for the instance to remove.
* @throws java.io.IOException If there is an error.
*/
public static void uninstall(CassandraAdmin admin, KijiURI kijiURI)
throws IOException {
final CassandraTableName hashTableName = CassandraTableName.getSchemaHashTableName(kijiURI);
deleteTable(admin, hashTableName);
final CassandraTableName idTableName = CassandraTableName.getSchemaIdTableName(kijiURI);
deleteTable(admin, idTableName);
final CassandraTableName counterTableName =
CassandraTableName.getSchemaCounterTableName(kijiURI);
deleteTable(admin, counterTableName);
}
/** {@inheritDoc} */
@Override
public SchemaTableBackup toBackup() throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot backup SchemaTable instance in state %s.", state);
mZKLock.lock();
List<SchemaTableEntry> entries = Lists.newArrayList();
try {
/** Entries from the schema hash table. */
final Set<SchemaEntry> hashTableEntries = loadSchemaHashTable();
if (!checkConsistency(hashTableEntries)) {
LOG.error("Schema hash table is inconsistent");
}
/** Entries from the schema ID table. */
final Set<SchemaEntry> idTableEntries = loadSchemaIdTable();
if (!checkConsistency(idTableEntries)) {
LOG.error("Schema hash table is inconsistent");
}
final Set<SchemaEntry> mergedEntries = new HashSet<SchemaEntry>(hashTableEntries);
mergedEntries.addAll(idTableEntries);
if (!checkConsistency(mergedEntries)) {
LOG.error("Merged schema hash and ID tables are inconsistent");
}
for (SchemaEntry entry : mergedEntries) {
entries.add(toAvroEntry(entry));
}
} finally {
mZKLock.unlock();
}
return SchemaTableBackup.newBuilder().setEntries(entries).build();
}
/** {@inheritDoc} */
@Override
public void fromBackup(final SchemaTableBackup backup) throws IOException {
final State state = mState.get();
Preconditions.checkState(state == State.OPEN,
"Cannot restore backup to SchemaTable instance in state %s.", state);
mZKLock.lock();
try {
/** Entries from the schema hash table. */
final Set<SchemaEntry> hashTableEntries = loadSchemaHashTable();
/** Entries from the schema ID table. */
final Set<SchemaEntry> idTableEntries = loadSchemaIdTable();
final Set<SchemaEntry> mergedEntries = new HashSet<SchemaEntry>(hashTableEntries);
mergedEntries.addAll(idTableEntries);
if (!checkConsistency(mergedEntries)) {
LOG.error("Merged schema hash and ID tables are inconsistent");
}
final List<SchemaTableEntry> avroBackupEntries = backup.getEntries();
final Set<SchemaEntry> schemaTableEntries =
new HashSet<SchemaEntry>(avroBackupEntries.size());
for (SchemaTableEntry avroEntry : avroBackupEntries) {
schemaTableEntries.add(fromAvroEntry(avroEntry));
}
if (!checkConsistency(schemaTableEntries)) {
LOG.error("Backup schema entries are inconsistent");
}
mergedEntries.addAll(schemaTableEntries);
if (!checkConsistency(schemaTableEntries)) {
LOG.error("Backup schema entries are inconsistent with already existing schema entries");
}
long maxSchemaId = -1L;
for (SchemaEntry entry : mergedEntries) {
maxSchemaId = Math.max(maxSchemaId, entry.getId());
}
final long nextSchemaId = maxSchemaId + 1;
flush();
// Restored schema entries share the same timestamp:
final long timestamp = System.currentTimeMillis();
for (SchemaEntry entry : schemaTableEntries) {
storeInTable(toAvroEntry(entry), timestamp, false); // do not flush
}
setSchemaIdCounter(nextSchemaId);
flush();
} finally {
mZKLock.unlock();
}
}
/**
* Checks the consistency of a collection of schema entries.
*
* @param entries Collection of schema entries.
* @return whether the entries are consistent.
*/
private static boolean checkConsistency(Set<SchemaEntry> entries) {
final Map<Long, SchemaEntry> idMap = new HashMap<Long, SchemaEntry>(entries.size());
final Map<BytesKey, SchemaEntry> hashMap = new HashMap<BytesKey, SchemaEntry>(entries.size());
boolean isConsistent = true;
for (SchemaEntry entry : entries) {
final SchemaEntry existingEntryWithId = idMap.put(entry.getId(), entry);
if ((existingEntryWithId != null) && !existingEntryWithId.equals(entry)) {
LOG.error(String.format("Conflicting schema entries with ID %d: %s vs %s",
entry.getId(), entry, existingEntryWithId));
isConsistent = false;
}
final SchemaEntry existingEntryWithHash = hashMap.put(entry.getHash(), entry);
if ((existingEntryWithHash != null) && !existingEntryWithHash.equals(entry)) {
if (existingEntryWithHash.getHash().equals(entry.getHash())
&& existingEntryWithHash.getSchema().equals(entry.getSchema())) {
// Does not affect consistency:
LOG.info(String.format("Schema with hash %s has multiple IDs: %d, %d: %s",
entry.getHash(), entry.getId(), existingEntryWithHash.getId(), entry.getSchema()));
} else {
LOG.info(String.format("Conflicting schema entries with hash %s: %s vs %s",
entry.getHash(), entry, existingEntryWithHash));
isConsistent = false;
}
}
}
return isConsistent;
}
/** Primitive types pre-allocated in all schema tables. */
enum PreRegisteredSchema {
STRING(Schema.Type.STRING), // ID 0
BYTES(Schema.Type.BYTES), // ID 1
INT(Schema.Type.INT), // ID 2
LONG(Schema.Type.LONG), // ID 3
FLOAT(Schema.Type.FLOAT), // ID 4
DOUBLE(Schema.Type.DOUBLE), // ID 5
BOOLEAN(Schema.Type.BOOLEAN), // ID 6
NULL(Schema.Type.NULL); // ID 7
/**
* Initializes a pre-registered schema descriptor.
*
* @param type Avro schema type.
*/
PreRegisteredSchema(Schema.Type type) {
mType = type;
mId = ordinal();
}
/** @return the Avro schema type. */
public Schema.Type getType() {
return mType;
}
/** @return the unique ID of the pre-allocated schema. */
public int getSchemaId() {
// By default, we use the enum ordinal
return mId;
}
private final int mId;
private final Schema.Type mType;
}
/** Number of pre-allocated schemas. */
public static final int PRE_REGISTERED_SCHEMA_COUNT = PreRegisteredSchema.values().length; // = 8
/**
* Pre-registers all the primitive data types.
*
* @throws java.io.IOException on I/O failure.
*/
private synchronized void registerPrimitiveSchemas() throws IOException {
int expectedSchemaId = 0;
LOG.debug("Pre-registering primitive schema types.");
for (PreRegisteredSchema desc : PreRegisteredSchema.values()) {
final Schema schema = Schema.create(desc.getType());
Preconditions.checkState(getOrCreateSchemaId(schema) == expectedSchemaId);
Preconditions.checkState(desc.getSchemaId() == expectedSchemaId);
expectedSchemaId += 1;
}
Preconditions.checkState(expectedSchemaId == PRE_REGISTERED_SCHEMA_COUNT);
}
/**
* Loads and check the consistency of the schema hash table.
*
* @return the set of schema entries from the schema hash table.
* @throws java.io.IOException on I/O error.
*/
private Set<SchemaEntry> loadSchemaHashTable() throws IOException {
LOG.info("Loading entries from schema hash table.");
final Set<SchemaEntry> entries = new HashSet<SchemaEntry>();
int hashTableRowCounter = 0;
// Fetch all of the schemas from the schema hash table (all versions)
final String queryText = String.format("SELECT * FROM %s;", mSchemaHashTable);
final ResultSet resultSet = mAdmin.execute(queryText);
for (Row row : resultSet) {
hashTableRowCounter += 1;
// TODO: Not sure how to replicate this check in C*...
/*
if (result.getRow().length != Hasher.HASH_SIZE_BYTES) {
LOG.error(String.format(
"Invalid schema hash table row key size: %s, expecting %d bytes.",
new BytesKey(result.getRow()), Hasher.HASH_SIZE_BYTES));
continue;
*/
// Get the row key, timestamp, and schema for this row
final BytesKey rowKey = new BytesKey(ByteUtils.toBytes(row.getBytes(SCHEMA_COLUMN_HASH_KEY)));
final long timestamp = row.getDate(SCHEMA_COLUMN_TIME).getTime();
final byte[] schemaAsBytes = ByteUtils.toBytes(row.getBytes(SCHEMA_COLUMN_VALUE));
try {
final SchemaEntry entry = fromAvroEntry(decodeSchemaEntry(schemaAsBytes));
entries.add(entry);
if (!getSchemaHash(entry.getSchema()).equals(entry.getHash())) {
LOG.error(
"Invalid schema hash table entry: computed schema hash {} does not match entry {}.",
getSchemaHash(entry.getSchema()), entry);
}
if (!rowKey.equals(entry.getHash())) {
LOG.error(
"Inconsistent schema hash table: hash encoded in row key {}"
+ " does not match schema entry: {}.",
rowKey, entry
);
}
} catch (IOException ioe) {
LOG.error("Unable to decode schema hash table entry for row {}, timestamp {}: {}.",
rowKey, timestamp, ioe);
} catch (AvroRuntimeException are) {
LOG.error(
"Unable to decode schema hash table entry for row {}, timestamp {}: {}.",
rowKey, timestamp, are);
}
}
LOG.info("Schema hash table has {} rows and {} entries.", hashTableRowCounter, entries.size());
return entries;
}
/**
* Loads and check the consistency of the schema ID table.
*
* @return the set of schema entries from the schema ID table.
* @throws java.io.IOException on I/O error.
*/
private Set<SchemaEntry> loadSchemaIdTable() throws IOException {
LOG.info("Loading entries from schema ID table.");
int idTableRowCounter = 0;
final Set<SchemaEntry> entries = new HashSet<SchemaEntry>();
// Fetch all of the schemas from the schema ID table (all versions)
final String queryText = String.format("SELECT * FROM %s;", mSchemaIdTable);
final ResultSet resultSet = mAdmin.execute(queryText);
for (Row row : resultSet) {
idTableRowCounter += 1;
// Get the row key, timestamp, and schema for this row. Use "Unsafe" version of method here
// to get raw bytes no matter what format the field is in the C* table.
final BytesKey rowKey = new BytesKey(
ByteUtils.toBytes(row.getBytesUnsafe(SCHEMA_COLUMN_ID_KEY)));
long schemaId = -1;
try {
schemaId = row.getLong(SCHEMA_COLUMN_ID_KEY);
} catch (InvalidTypeException exn) {
LOG.error(String.format("Unable to decode schema ID encoded in row key %s: %s",
rowKey, exn));
}
final long timestamp = row.getDate(SCHEMA_COLUMN_TIME).getTime();
final byte[] schemaAsBytes =
ByteUtils.toBytes(row.getBytes(SCHEMA_COLUMN_VALUE));
try {
final SchemaEntry entry = fromAvroEntry(decodeSchemaEntry(schemaAsBytes));
entries.add(entry);
if (!getSchemaHash(entry.getSchema()).equals(entry.getHash())) {
LOG.error(String.format("Invalid schema hash table entry with row key %s: "
+ "computed schema hash %s does not match entry %s",
rowKey, getSchemaHash(entry.getSchema()), entry));
}
if (schemaId != entry.getId()) {
LOG.error(
"Inconsistent schema ID table: ID encoded in row key {} does not match entry: {}.",
schemaId, entry);
}
} catch (IOException ioe) {
LOG.error("Unable to decode schema ID table entry for row {}, timestamp {}: {}.",
rowKey, timestamp, ioe);
} catch (AvroRuntimeException are) {
LOG.error("Unable to decode schema ID table entry for row {}, timestamp {}: {}.",
rowKey, timestamp, are);
}
}
LOG.info("Schema ID table has {} rows and {} entries.", idTableRowCounter, entries.size());
return entries;
}
/** {@inheritDoc} */
@Override
public String toString() {
return Objects.toStringHelper(CassandraSchemaTable.class)
.add("uri", mInstanceURI)
.add("state", mState.get())
.toString();
}
}