/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hbase.avro;
import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.SchemaNotFoundException;
import com.cloudera.cdk.data.SchemaValidationException;
import com.cloudera.cdk.data.hbase.impl.BaseDao;
import com.cloudera.cdk.data.hbase.impl.BaseEntityMapper;
import com.cloudera.cdk.data.hbase.impl.CompositeBaseDao;
import com.cloudera.cdk.data.hbase.impl.Dao;
import com.cloudera.cdk.data.hbase.impl.EntityMapper;
import com.cloudera.cdk.data.hbase.impl.SchemaManager;
import com.google.common.collect.Lists;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.specific.SpecificRecord;
import org.apache.hadoop.hbase.client.HTablePool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Dao for Avro's SpecificRecords. In this Dao implementation, both the
* underlying key record type, and the entity type are SpecificRecords. This Dao
* allows us to persist and fetch these SpecificRecords to and from HBase.
*
* @param <K>
* The StorageKey's underlying record type.
* @param <E>
* The entity type.
*/
public class SpecificAvroDao<E extends SpecificRecord> extends BaseDao<E> {
private static Logger LOG = LoggerFactory.getLogger(SpecificAvroDao.class);
private static final AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
/**
* Construct the SpecificAvroDao.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The name of the table this Dao will read from and write to.
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param entitySchemaString
* The json string representing the special avro record schema, that
* contains metadata in annotations of the Avro record fields. See
* {@link AvroEntityMapper} for details.
* @param keyClass
* The class of the SpecificRecord this DAO will use as a key
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
String entitySchemaString, Class<E> entityClass) {
super(tablePool, tableName, buildEntityMapper(entitySchemaString,
entitySchemaString, entityClass));
}
/**
* Construct the SpecificAvroDao.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The name of the table this Dao will read from and write to.
* @param keySchemaStream
* The json stream representing the avro schema for the key.
* @param entitySchemaStream
* The json stream representing the special avro record schema, that
* contains metadata in annotations of the Avro record fields. See
* {@link AvroEntityMapper} for details.
* @param keyClass
* The class of the SpecificRecord this DAO will use as a key
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
InputStream entitySchemaStream, Class<E> entityClass) {
this(tablePool, tableName, AvroUtils
.inputStreamToString(entitySchemaStream), entityClass);
}
/**
* Construct the SpecificAvroDao with an EntityManager, which will provide the
* entity mapper to this Dao that knows how to map the different entity schema
* versions defined by the managed schema.
*
* @param tablePool
* An HTabePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param entityName
* The entity name of the managed schema.
* @param schemaManager
* The SchemaManager which will be used to query schema information
* from the meta store.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
String entityName, SchemaManager schemaManager) {
super(tablePool, tableName, new VersionedAvroEntityMapper.Builder()
.setSchemaManager(schemaManager).setTableName(tableName)
.setEntityName(entityName).setSpecific(true).<E> build());
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param subEntitySchemaStrings
* The list of entities that make up the composite. This list must be
* in the same order as the fields defined in the entitySchemaString.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws SchemaValidationException
*/
@SuppressWarnings("unchecked")
public static <E extends SpecificRecord, S extends SpecificRecord> Dao<E> buildCompositeDao(
HTablePool tablePool, String tableName,
List<String> subEntitySchemaStrings, Class<E> entityClass) {
List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>();
for (String subEntitySchemaString : subEntitySchemaStrings) {
AvroEntitySchema subEntitySchema = parser
.parseEntitySchema(subEntitySchemaString);
Class<S> subEntityClass;
try {
subEntityClass = (Class<S>) Class.forName(subEntitySchema
.getAvroSchema().getFullName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
entityMappers.add(SpecificAvroDao.<S> buildEntityMapper(
subEntitySchemaString, subEntitySchemaString, subEntityClass));
}
return new SpecificCompositeAvroDao<E, S>(tablePool, tableName,
entityMappers, entityClass);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* in a Map container.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param subEntitySchemaStrings
* The list of entities that make up the composite.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws SchemaValidationException
*/
@SuppressWarnings("unchecked")
public static <K extends SpecificRecord, S extends SpecificRecord> Dao<
Map<String, S>> buildCompositeDao(
HTablePool tablePool, String tableName,
List<String> subEntitySchemaStrings) {
List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>();
for (String subEntitySchemaString : subEntitySchemaStrings) {
AvroEntitySchema subEntitySchema = parser
.parseEntitySchema(subEntitySchemaString);
Class<S> subEntityClass;
try {
subEntityClass = (Class<S>) Class.forName(subEntitySchema
.getAvroSchema().getFullName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
entityMappers.add(SpecificAvroDao.<S> buildEntityMapper(
subEntitySchemaString, subEntitySchemaString,
subEntityClass));
}
return new SpecificMapCompositeAvroDao<S>(tablePool, tableName, entityMappers);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaStream
* The Avro schema input stream that represents the StorageKey structure for
* row keys in this table.
* @param subEntitySchemaStreams
* The list of entities that make up the composite. This list must be
* in the same order as the fields defined in the entitySchemaString.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws SchemaValidationException
*/
public static <E extends SpecificRecord, S extends SpecificRecord> Dao<E> buildCompositeDaoWithInputStream(
HTablePool tablePool, String tableName,
List<InputStream> subEntitySchemaStreams, Class<E> entityClass) {
List<String> subEntitySchemaStrings = new ArrayList<String>();
for (InputStream subEntitySchemaStream : subEntitySchemaStreams) {
subEntitySchemaStrings.add(AvroUtils
.inputStreamToString(subEntitySchemaStream));
}
return buildCompositeDao(tablePool, tableName, subEntitySchemaStrings,
entityClass);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTabePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param entityClass
* The class that is the composite record, which is made up of fields
* referencing the sub records.
* @param schemaManager
* The SchemaManager which will use to create the entity mapper that
* will power this dao.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
*/
public static <K extends SpecificRecord, E extends SpecificRecord, S extends SpecificRecord> Dao<E> buildCompositeDaoWithEntityManager(
HTablePool tablePool, String tableName, Class<E> entityClass,
SchemaManager schemaManager) {
Schema entitySchema = getSchemaFromEntityClass(entityClass);
List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>();
for (Schema.Field field : entitySchema.getFields()) {
entityMappers.add(new VersionedAvroEntityMapper.Builder()
.setSchemaManager(schemaManager).setTableName(tableName)
.setEntityName(getSchemaName(field.schema())).setSpecific(true)
.<S> build());
}
return new SpecificCompositeAvroDao<E, S>(tablePool, tableName,
entityMappers, entityClass);
}
private static String getSchemaName(Schema schema) {
if (schema.getType() == Schema.Type.UNION) {
List<Schema> types = schema.getTypes();
if (types.size() == 2) {
if (types.get(0).getType() == Schema.Type.NULL) {
return types.get(1).getName();
} else if (types.get(1).getType() == Schema.Type.NULL) {
return types.get(0).getName();
}
}
throw new IllegalArgumentException("Unsupported union schema: " + schema);
}
return schema.getName();
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* in a Map container.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param subEntityClasses
* The classes that make up the subentities.
* @param schemaManager
* The SchemaManager which will use to create the entity mapper that
* will power this dao.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
*/
public static <K extends SpecificRecord, S extends SpecificRecord> Dao<Map<String, S>> buildCompositeDaoWithEntityManager(
HTablePool tablePool, String tableName, List<Class<S>> subEntityClasses,
SchemaManager schemaManager) {
List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>();
for (Class<S> subEntityClass : subEntityClasses) {
String entityName = getSchemaFromEntityClass(subEntityClass).getName();
entityMappers.add(new VersionedAvroEntityMapper.Builder()
.setSchemaManager(schemaManager).setTableName(tableName)
.setEntityName(entityName).setSpecific(true)
.<S> build());
}
return new SpecificMapCompositeAvroDao<S>(tablePool, tableName,
entityMappers);
}
private static Schema getSchemaFromEntityClass(Class<?> entityClass) {
try {
return (Schema) entityClass.getDeclaredField("SCHEMA$").get(null);
} catch (Throwable e) {
LOG.error(
"Error getting schema from entity of type: " + entityClass.getName(),
e);
throw new DatasetException(e);
}
}
/**
* CompositeBaseDao implementation for Specific avro records.
*
* @param <K>
* The key type this dao fetches and persists
* @param <E>
* The entity type this dao fetches and persists
*/
private static class SpecificCompositeAvroDao<E extends SpecificRecord, S extends SpecificRecord>
extends CompositeBaseDao<E, S> {
private final Class<E> entityClass;
private final Constructor<E> entityConstructor;
private final Schema entitySchema;
public SpecificCompositeAvroDao(HTablePool tablePool, String tableName,
List<EntityMapper<S>> entityMappers, Class<E> entityClass) {
super(tablePool, tableName, entityMappers);
this.entityClass = entityClass;
try {
entityConstructor = entityClass.getConstructor();
entitySchema = (Schema) entityClass.getDeclaredField("SCHEMA$").get(
null);
} catch (Throwable e) {
LOG.error(
"Error getting constructor or schema field for entity of type: "
+ entityClass.getName(), e);
throw new DatasetException(e);
}
}
@Override
public E compose(List<S> subEntities) {
E entity;
try {
entity = entityConstructor.newInstance();
} catch (Throwable e) {
LOG.error(
"Error trying to construct entity of type: "
+ entityClass.getName(), e);
throw new DatasetException(e);
}
int cnt = 0;
for (S subEntity : subEntities) {
if (subEntity != null) {
entity.put(cnt, subEntity);
}
cnt++;
}
return entity;
}
@SuppressWarnings("unchecked")
@Override
public List<S> decompose(E entity) {
List<S> subEntityList = new ArrayList<S>();
for (int i = 0; i < entitySchema.getFields().size(); i++) {
subEntityList.add((S) entity.get(i));
}
return subEntityList;
}
}
/**
* CompositeBaseDao implementation for Specific avro records where the composite
* entity is a map.
*/
private static class SpecificMapCompositeAvroDao<
S extends SpecificRecord>
extends CompositeBaseDao<Map<String, S>, S> {
private final List<Schema> subEntitySchemas;
public SpecificMapCompositeAvroDao(HTablePool tablePool, String tableName,
List<EntityMapper<S>> entityMappers) {
super(tablePool, tableName, entityMappers);
subEntitySchemas = Lists.newArrayList();
for (EntityMapper<S> entityMapper : entityMappers) {
subEntitySchemas.add(parser.parseEntitySchema(entityMapper.getEntitySchema().getRawSchema()).getAvroSchema());
}
}
@Override
public Map<String, S> compose(List<S> entities) {
Map<String, S> retEntity = new HashMap<String, S>();
int cnt = 0;
for (S entity : entities) {
if (entity != null) {
retEntity.put(subEntitySchemas.get(cnt).getName(), entity);
}
cnt++;
}
return retEntity;
}
@Override
public List<S> decompose(Map<String, S> entity) {
List<S> subEntityList = new ArrayList<S>();
for (Schema s : subEntitySchemas) {
subEntityList.add(entity.get(s.getName()));
}
return subEntityList;
}
}
private static <E extends SpecificRecord> BaseEntityMapper<E> buildEntityMapper(
String readerSchemaStr, String writtenSchemaStr,
Class<E> entityClass) {
AvroEntitySchema readerSchema = parser.parseEntitySchema(readerSchemaStr);
// The specific class may have been compiled with a setting that adds the
// string type to the string fields, but aren't in the local or managed
// schemas.
readerSchema = AvroUtils
.mergeSpecificStringTypes(entityClass, readerSchema);
AvroEntitySchema writtenSchema = parser.parseEntitySchema(writtenSchemaStr);
AvroEntityComposer<E> entityComposer = new AvroEntityComposer<E>(
readerSchema, true);
AvroEntitySerDe<E> entitySerDe = new AvroEntitySerDe<E>(entityComposer,
readerSchema, writtenSchema, true);
AvroKeySchema keySchema = parser.parseKeySchema(readerSchemaStr);
keySchema = AvroUtils.mergeSpecificStringTypes(entityClass, keySchema);
AvroKeySerDe keySerDe = new AvroKeySerDe(keySchema.getAvroSchema(),
keySchema.getPartitionStrategy());
return new BaseEntityMapper<E>(keySchema, readerSchema, keySerDe,
entitySerDe);
}
}