/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hbase.tool;
import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.SchemaValidationException;
import com.cloudera.cdk.data.hbase.avro.AvroEntitySchema;
import com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser;
import com.cloudera.cdk.data.hbase.avro.AvroKeySchema;
import com.cloudera.cdk.data.hbase.avro.AvroUtils;
import com.cloudera.cdk.data.hbase.impl.Constants;
import com.cloudera.cdk.data.hbase.impl.KeySchema;
import com.cloudera.cdk.data.hbase.impl.SchemaManager;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.map.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility class for managing Managed Schemas in HBase Common.
*/
public class SchemaTool {
private static final Logger LOG = LoggerFactory.getLogger(SchemaTool.class);
private static final String CLASSPATH_PREFIX = "classpath:";
private static final AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
private static final ObjectMapper mapper = new ObjectMapper();
private static final JsonFactory factory = mapper.getJsonFactory();
private final SchemaManager schemaManager;
private final HBaseAdmin hbaseAdmin;
public SchemaTool(HBaseAdmin hbaseAdmin, SchemaManager entityManager) {
this.hbaseAdmin = hbaseAdmin;
this.schemaManager = entityManager;
}
/**
* Scans the schemaDirectory for avro schemas, and creates or migrates HBase
* Common managed schemas managed by this instances entity manager.
*
* @param schemaDirectory
* The directory to recursively scan for avro schema files. This
* directory can be a directory on the classpath, including a
* directory that is embeddded in a jar on the classpath. In both of
* those cases, the schemaDirectory should be prefixed with
* classpath:
* @param createTableAndFamilies
* If true, will create the table for each schema if it doesn't
* exist, and will create families if they don't exist.
*/
public void createOrMigrateSchemaDirectory(String schemaDirectory,
boolean createTableAndFamilies) {
List<String> schemaStrings;
if (schemaDirectory.startsWith(CLASSPATH_PREFIX)) {
URL dirURL = getClass().getClassLoader().getResource(
schemaDirectory.substring(CLASSPATH_PREFIX.length()));
if (dirURL != null && dirURL.getProtocol().equals("file")) {
try {
schemaStrings = getSchemaStringsFromDir(new File(dirURL.toURI()));
} catch (URISyntaxException e) {
throw new DatasetException(e);
}
} else if (dirURL != null && dirURL.getProtocol().equals("jar")) {
String jarPath = dirURL.getPath().substring(5,
dirURL.getPath().indexOf("!"));
schemaStrings = getSchemaStringsFromJar(jarPath,
schemaDirectory.substring(CLASSPATH_PREFIX.length()));
} else {
String msg = "Could not find classpath resource: " + schemaDirectory;
LOG.error(msg);
throw new DatasetException(msg);
}
} else {
schemaStrings = getSchemaStringsFromDir(new File(schemaDirectory));
}
Map<String, String> tableKeySchemaMap = new HashMap<String, String>();
Map<String, List<String>> tableEntitySchemaMap = new HashMap<String, List<String>>();
for (String schemaString : schemaStrings) {
String name = getEntityNameFromSchemaString(schemaString);
List<String> tables = getTablesFromSchemaString(schemaString);
if (name.endsWith("StorageKey")) {
for (String table : tables) {
if (tableKeySchemaMap.containsKey(table)) {
String msg = "Multiple keys for table: " + table;
LOG.error(msg);
throw new SchemaValidationException(msg);
}
LOG.debug("Adding key to tableKeySchemaMap for table: " + table
+ ". " + schemaString);
tableKeySchemaMap.put(table, schemaString);
}
} else {
for (String table : tables) {
if (tableEntitySchemaMap.containsKey(table)) {
tableEntitySchemaMap.get(table).add(schemaString);
} else {
List<String> entityList = new ArrayList<String>();
entityList.add(schemaString);
tableEntitySchemaMap.put(table, entityList);
}
}
}
}
for (Entry<String, List<String>> entry : tableEntitySchemaMap.entrySet()) {
String table = entry.getKey();
List<String> entitySchemas = entry.getValue();
if (!tableKeySchemaMap.containsKey(table)) {
String msg = "No StorageKey Schema For Table: " + table;
LOG.error(msg);
throw new DatasetException(msg);
}
if (entitySchemas.size() == 0) {
String msg = "StorageKey, but no entity schemas for Table: " + table;
LOG.error(msg);
throw new SchemaValidationException(msg);
}
for (String entitySchema : entry.getValue()) {
createOrMigrateSchema(table, entitySchema, createTableAndFamilies);
}
}
}
/**
* Creates a new managed schema, or migrates an existing one if one exists for
* the table name, entity name pair.
*
* @param tableName
* The name of the table we'll be creating or migrating a schema for.
* @param keySchemaFilePath
* The absolute file path to the key schema file.
* @param entitySchemaFilePath
* The absolute file path to the entity schema file.
* @param createTableAndFamilies
* If true, will create the table for this schema if it doesn't
* exist, and will create families if they don't exist.
*/
public void createOrMigrateSchemaFile(String tableName,
String entitySchemaFilePath, boolean createTableAndFamilies) {
createOrMigrateSchemaFile(tableName, new File(entitySchemaFilePath),
createTableAndFamilies);
}
/**
* Creates a new managed schema, or migrates an existing one if one exists for
* the table name, entity name pair.
*
* @param tableName
* The name of the table we'll be creating or migrating a schema for.
* @param keySchemaFile
* The key schema file.
* @param entitySchemaFile
* The entity schema file.
* @param createTableAndFamilies
* If true, will create the table for this schema if it doesn't
* exist, and will create families if they don't exist.
*/
public void createOrMigrateSchemaFile(String tableName,
File entitySchemaFile, boolean createTableAndFamilies) {
createOrMigrateSchema(tableName, getSchemaStringFromFile(entitySchemaFile),
createTableAndFamilies);
}
/**
* Creates a new managed schema, or migrates an existing one if one exists for
* the table name, entity name pair.
*
* @param tableName
* The name of the table we'll be creating or migrating a schema for.
* @param keySchemaString
* The key schema
* @param entitySchemaString
* The entity schema
* @param createTableAndFamilies
* If true, will create the table for this schema if it doesn't
* exist, and will create families if they don't exist.
*/
public void createOrMigrateSchema(String tableName,
String entitySchemaString, boolean createTableAndFamilies) {
String entityName = getEntityNameFromSchemaString(entitySchemaString);
AvroEntitySchema entitySchema = parser
.parseEntitySchema(entitySchemaString);
AvroKeySchema keySchema = parser.parseKeySchema(entitySchemaString);
if (schemaManager.hasManagedSchema(tableName, entityName)) {
KeySchema currentKeySchema = schemaManager.getKeySchema(tableName,
entityName);
if (!keySchema.equals(currentKeySchema)) {
String msg = "Migrating schema with different keys. Current: "
+ currentKeySchema.getRawSchema() + " New: "
+ keySchema.getRawSchema();
LOG.error(msg);
throw new SchemaValidationException(msg);
}
if (schemaManager.getEntityVersion(tableName, entityName, entitySchema) == -1) {
LOG.info("Migrating Schema: (" + tableName + ", " + entityName + ")");
schemaManager.migrateSchema(tableName, entityName, entitySchemaString);
} else {
// don't set createTableAndFamilies to false, becasue we may still need
// to update the table to support what exists in the meta store.
LOG.info("Schema hasn't changed, not migrating: (" + tableName + ", "
+ entityName + ")");
}
} else {
LOG.info("Creating Schema: (" + tableName + ", " + entityName + ")");
parser.parseEntitySchema(entitySchemaString).getRequiredColumnFamilies();
schemaManager.createSchema(tableName, entityName, entitySchemaString,
"com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser",
"com.cloudera.cdk.data.hbase.avro.AvroKeySerDe",
"com.cloudera.cdk.data.hbase.avro.AvroEntitySerDe");
}
if (createTableAndFamilies) {
try {
if (!hbaseAdmin.tableExists(tableName)) {
HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor(Constants.SYS_COL_FAMILY));
desc.addFamily(new HColumnDescriptor(Constants.OBSERVABLE_COL_FAMILY));
for (String columnFamily : entitySchema.getRequiredColumnFamilies()) {
desc.addFamily(new HColumnDescriptor(columnFamily));
}
hbaseAdmin.createTable(desc);
} else {
Set<String> familiesToAdd = entitySchema.getRequiredColumnFamilies();
familiesToAdd.add(new String(Constants.SYS_COL_FAMILY));
familiesToAdd.add(new String(Constants.OBSERVABLE_COL_FAMILY));
HTableDescriptor desc = hbaseAdmin.getTableDescriptor(tableName
.getBytes());
for (HColumnDescriptor columnDesc : desc.getColumnFamilies()) {
String familyName = columnDesc.getNameAsString();
if (familiesToAdd.contains(familyName)) {
familiesToAdd.remove(familyName);
}
}
if (familiesToAdd.size() > 0) {
hbaseAdmin.disableTable(tableName);
try {
for (String family : familiesToAdd) {
hbaseAdmin.addColumn(tableName, new HColumnDescriptor(family));
}
} finally {
hbaseAdmin.enableTable(tableName);
}
}
}
} catch (IOException e) {
throw new DatasetException(e);
}
}
}
/**
* Will return the contents of schemaFile as a string
*
* @param schemaFile
* The file who's contents should be returned.
* @return The contents of schemaFile
*/
private String getSchemaStringFromFile(File schemaFile) {
String schemaString;
FileInputStream fis = null;
try {
fis = new FileInputStream(schemaFile);
schemaString = AvroUtils.inputStreamToString(fis);
} catch (IOException e) {
throw new DatasetException(e);
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
}
}
}
return schemaString;
}
private List<String> getTablesFromSchemaString(String schema) {
JsonNode node;
try {
JsonParser jp = factory.createJsonParser(schema);
node = mapper.readTree(jp);
if (node.get("tables") == null) {
return new ArrayList<String>();
}
List<String> result = new ArrayList<String>(node.get("tables").size());
for (Iterator<JsonNode> it = node.get("tables").getElements(); it
.hasNext();) {
result.add(it.next().getTextValue());
}
return result;
} catch (JsonParseException e) {
throw new SchemaValidationException(e);
} catch (IOException e) {
throw new SchemaValidationException(e);
}
}
private String getEntityNameFromSchemaString(String schema) {
JsonNode node;
try {
JsonParser jp = factory.createJsonParser(schema);
node = mapper.readTree(jp);
if (node.get("name") == null) {
return null;
}
return node.get("name").getTextValue();
} catch (JsonParseException e) {
throw new SchemaValidationException(e);
} catch (IOException e) {
throw new SchemaValidationException(e);
}
}
/**
* Gets the list of HBase Common Avro schema strings from dir. It recursively
* searches dir to find files that end in .avsc to locate those strings.
*
* @param dir
* The dir to recursively search for schema strings
* @return The list of schema strings
*/
private List<String> getSchemaStringsFromDir(File dir) {
List<String> schemaStrings = new ArrayList<String>();
Collection<File> schemaFiles = FileUtils.listFiles(dir,
new SuffixFileFilter(".avsc"), TrueFileFilter.INSTANCE);
for (File schemaFile : schemaFiles) {
schemaStrings.add(getSchemaStringFromFile(schemaFile));
}
return schemaStrings;
}
/**
* Gets the list of HBase Common Avro schema strings from a directory in the
* Jar. It recursively searches the directory in the jar to find files that
* end in .avsc to locate thos strings.
*
* @param jarPath
* The path to the jar to search
* @param directoryPath
* The directory in the jar to find avro schema strings
* @return The list of schema strings.
*/
private List<String> getSchemaStringsFromJar(String jarPath,
String directoryPath) {
LOG.info("Getting schema strings in: " + directoryPath + ", from jar: "
+ jarPath);
JarFile jar;
try {
jar = new JarFile(URLDecoder.decode(jarPath, "UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new DatasetException(e);
} catch (IOException e) {
throw new DatasetException(e);
}
Enumeration<JarEntry> entries = jar.entries();
List<String> schemaStrings = new ArrayList<String>();
while (entries.hasMoreElements()) {
JarEntry jarEntry = entries.nextElement();
if (jarEntry.getName().startsWith(directoryPath)
&& jarEntry.getName().endsWith(".avsc")) {
LOG.info("Found schema: " + jarEntry.getName());
InputStream inputStream;
try {
inputStream = jar.getInputStream(jarEntry);
} catch (IOException e) {
throw new DatasetException(e);
}
String schemaString = AvroUtils.inputStreamToString(inputStream);
schemaStrings.add(schemaString);
}
}
return schemaStrings;
}
}