Package org.apache.cassandra.hadoop.hive.metastore

Source Code of org.apache.cassandra.hadoop.hive.metastore.SchemaManagerService

package org.apache.cassandra.hadoop.hive.metastore;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ConfigurationException;
import org.apache.cassandra.config.KSMetaData;
import org.apache.cassandra.db.ColumnFamilyType;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.AbstractUUIDType;
import org.apache.cassandra.db.marshal.AsciiType;
import org.apache.cassandra.db.marshal.BytesType;
import org.apache.cassandra.db.marshal.IntegerType;
import org.apache.cassandra.db.marshal.LongType;
import org.apache.cassandra.db.marshal.TypeParser;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.thrift.CfDef;
import org.apache.cassandra.thrift.ColumnDef;
import org.apache.cassandra.thrift.InvalidRequestException;
import org.apache.cassandra.thrift.KsDef;
import org.apache.cassandra.thrift.NotFoundException;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Service encapsulating schema managment of the Brisk platform. This service deals
* with both the Hive meta store schema as well as maintaining the mappings of
* column family and keyspace objects to meta store tables and databases respectively.
*
* @author zznate
*/
public class SchemaManagerService
{

    private static Logger log = LoggerFactory.getLogger(SchemaManagerService.class);
    private CassandraClientHolder cassandraClientHolder;
    private Configuration configuration;
    private CassandraHiveMetaStore cassandraHiveMetaStore;
    private Warehouse warehouse;

    public SchemaManagerService(CassandraHiveMetaStore cassandraHiveMetaStore, Configuration conf)
    {
        this.cassandraHiveMetaStore = cassandraHiveMetaStore;
        this.configuration = conf;
        this.cassandraClientHolder = new CassandraClientHolder(configuration);
        try {
            this.warehouse = new Warehouse(configuration);
        } catch (MetaException me) {
            throw new CassandraHiveMetaStoreException("Could not start schemaManagerService.", me);
        }
    }

    /**
     * Create the meta store keyspace if it does not already exist.
     * @return true if the keyspace did no exist and the creation was successful. False if the
     * keyspace already existed.
     * @throws {@link CassandraHiveMetaStoreException} wrapping the underlying exception if we
     * failed to create the keyspace.
     */
    public boolean createMetaStoreIfNeeded()
    {
        if ( configuration.getBoolean("cassandra.skipMetaStoreCreate", false) )
            return false;
        try
        {
            cassandraClientHolder.applyKeyspace();
            return false;
        }
        catch (CassandraHiveMetaStoreException chmse)
        {
            log.debug("Attempting to create meta store keyspace: First set_keyspace call failed. Sleeping.");
        }

        //Sleep a random amount of time to stagger ks creations on many nodes
        try
        {
            Thread.sleep(5000);
        }
        catch (InterruptedException e1)
        {

        }

        //check again...
        try
        {
            cassandraClientHolder.applyKeyspace();
            return false;
        }
        catch (CassandraHiveMetaStoreException chmse)
        {
            log.debug("Attempting to create meta store keyspace after sleep.");
        }

        CfDef cf = new CfDef(cassandraClientHolder.getKeyspaceName(),
                cassandraClientHolder.getColumnFamily());
        cf.setKey_validation_class("UTF8Type");
        cf.setComparator_type("UTF8Type");
        KsDef ks = new KsDef(cassandraClientHolder.getKeyspaceName(),
                "org.apache.cassandra.locator.SimpleStrategy",
                Arrays.asList(cf));
        ks.setStrategy_options(KSMetaData.optsWithRF(configuration.getInt(CassandraClientHolder.CONF_PARAM_REPLICATION_FACTOR, 1)));
        try
        {
            cassandraClientHolder.getClient().system_add_keyspace(ks);
            return true;
        }
        catch (Exception e)
        {
            throw new CassandraHiveMetaStoreException("Could not create Hive MetaStore database: " + e.getMessage(), e);
        }
    }

    /**
     * Returns a List of Keyspace definitions that are not yet created as 'databases'
     * in the Hive meta store. The list of keyspaces required for brisk operation are ignored.
     * @return
     */
    public List<KsDef> findUnmappedKeyspaces()
    {
        List<KsDef> defs;
        try
        {
            defs = cassandraClientHolder.getClient().describe_keyspaces();

            for (Iterator<KsDef> iterator = defs.iterator(); iterator.hasNext();)
            {
                KsDef ksDef = iterator.next();
                String name = ksDef.name;
                log.debug("Found ksDef name: {}",name);
                if ( StringUtils.indexOfAny(name, SYSTEM_KEYSPACES) > -1 || isKeyspaceMapped(name))
                {
                    log.debug("REMOVING ksDef name from unmapped List: {}",name);
                    iterator.remove();
                }
            }
        }
        catch (Exception ex)
        {
            throw new CassandraHiveMetaStoreException("Could not retrieve unmapped keyspaces",ex);
        }
        return defs;
    }

    public KsDef getKeyspaceForDatabaseName(String databaseName)
    {
        try
        {
            return cassandraClientHolder.getClient().describe_keyspace(databaseName);
        }
        catch (NotFoundException e)
        {
            return null;
        }
        catch (Exception ex)
        {
            throw new CassandraHiveMetaStoreException("Problem finding Keyspace for databaseName " + databaseName, ex);
        }
    }


    /**
     * Returns true if this keyspaceName returns a Database via
     * {@link CassandraHiveMetaStore#getDatabase(String)}
     * @param keyspaceName
     * @return
     */
    public boolean isKeyspaceMapped(String keyspaceName)
    {
        try
        {
            return cassandraHiveMetaStore.getDatabase(keyspaceName) != null;
        }
        catch (NoSuchObjectException e)
        {
            return false;
        }
    }

    /**
     * Creates the database based on the Keyspace's name. The tables
     * are created similarly based off the names of the column families.
     * Column family meta data will be used to define the table's fields.
     *
     * @param ksDef
     */
    public void createKeyspaceSchema(KsDef ksDef)
    {
        try
        {
            cassandraHiveMetaStore.createDatabase(buildDatabase(ksDef));
            for (CfDef cfDef : ksDef.cf_defs)
            {
                cassandraHiveMetaStore.createTable(buildTable(cfDef));
            }

        }
        catch (InvalidObjectException ioe)
        {
            throw new CassandraHiveMetaStoreException("Could not create keyspace schema.", ioe);
        }
        catch (MetaException me)
        {
            throw new CassandraHiveMetaStoreException("Problem persisting metadata", me);
        }

    }

    public void createKeyspaceSchemasIfNeeded()
    {
        if ( getAutoCreateSchema() )
        {
            List<KsDef> keyspaces = findUnmappedKeyspaces();
            for (KsDef ksDef : keyspaces)
            {
                createKeyspaceSchema(ksDef);
            }
        }
    }

    /**
     * Compares the column families in the keyspace with what we have in hive so far,
     * creating tables for any that do not exist as such already.
     * @param ksDef
     */
    public void createNewColumnFamilyTables(KsDef ksDef)
    {
        for (CfDef cfDef : ksDef.cf_defs)
        {
            try
            {
                if ( cassandraHiveMetaStore.getTable(cfDef.keyspace, cfDef.name) == null)
                {
                    if ( cfDef.getColumn_metadataSize() > 0 )
                    {
                        cassandraHiveMetaStore.createTable(buildTable(cfDef));
                    }
                }
            }
            catch (InvalidObjectException ioe)
            {
                throw new CassandraHiveMetaStoreException("Could not create table for CF: " + cfDef.name, ioe);
            }
            catch (MetaException me)
            {
                throw new CassandraHiveMetaStoreException("Problem persisting metadata for CF: " + cfDef.name, me);
            }
        }
    }

    /**
     * Check to see if we are configured to auto create schema
     * @return the value of 'cassandra.autoCreateHiveSchema' according to
     * the configuration. False by default.
     */
    public boolean getAutoCreateSchema()
    {
        return configuration.getBoolean("cassandra.autoCreateHiveSchema", false);
    }

    private Database buildDatabase(KsDef ksDef)
    {
        Database database = new Database();
        try
        {
            database.setLocationUri(warehouse.getDefaultDatabasePath(ksDef.name).toString());
        }
        catch (MetaException me)
        {
            throw new CassandraHiveMetaStoreException("Could not determine storage URI of database", me);
        }
        database.setName(ksDef.name);
        return database;
    }

    private Table buildTable(CfDef cfDef)
    {
        Table table = new Table();
        table.setDbName(cfDef.keyspace);
        table.setTableName(cfDef.name);
        table.setTableType(TableType.EXTERNAL_TABLE.toString());
        table.putToParameters("EXTERNAL", "TRUE");
        table.putToParameters("cassandra.ks.name", cfDef.keyspace);
        table.putToParameters("cassandra.cf.name", cfDef.name);
        table.putToParameters("cassandra.slice.predicate.size", "100");
        table.putToParameters("storage_handler", "org.apache.hadoop.hive.cassandra.CassandraStorageHandler");
        table.setPartitionKeys(new ArrayList<FieldSchema>());
        // cassandra.column.mapping

        StorageDescriptor sd = new StorageDescriptor();
        sd.setInputFormat("org.apache.hadoop.hive.cassandra.input.HiveCassandraStandardColumnInputFormat");
        sd.setOutputFormat("org.apache.hadoop.hive.cassandra.output.HiveCassandraOutputFormat");
        sd.setParameters(new HashMap<String, String>());
        try {
            sd.setLocation(warehouse.getDefaultTablePath(cfDef.keyspace, cfDef.name).toString());
        } catch (MetaException me) {
            log.error("could not build path information correctly",me);
        }
        SerDeInfo serde = new SerDeInfo();
        serde.setSerializationLib("org.apache.hadoop.hive.cassandra.serde.CassandraColumnSerDe");
        serde.putToParameters("serialization.format", "1");
        StringBuilder mapping = new StringBuilder();
        StringBuilder validator = new StringBuilder();
        try {
            CFMetaData cfm = CFMetaData.fromThrift(cfDef);

            AbstractType keyValidator = cfDef.key_validation_class != null ? TypeParser.parse(cfDef.key_validation_class) : BytesType.instance;

            addTypeToStorageDescriptor(sd, ByteBufferUtil.bytes("row_key"), keyValidator, keyValidator);
            mapping.append(":key");
            validator.append(keyValidator.toString());

            for (ColumnDef column : cfDef.getColumn_metadata() )
            {
                addTypeToStorageDescriptor(sd, column.name, TypeParser.parse(cfDef.comparator_type),  TypeParser.parse(column.getValidation_class()));
                try {
                  mapping.append(",");
                  mapping.append(ByteBufferUtil.string(column.name));
                  validator.append(",");
                  validator.append(column.getValidation_class());
                } catch (CharacterCodingException e) {
                  log.error("could not build column mapping correctly", e);
                }
            }

            serde.putToParameters("cassandra.columns.mapping", mapping.toString());
            serde.putToParameters("cassandra.cf.validatorType", validator.toString());
            sd.setSerdeInfo(serde);
        } catch (ConfigurationException ce) {
            throw new CassandraHiveMetaStoreException("Problem converting comparator type: " + cfDef.comparator_type, ce);
        } catch (InvalidRequestException ire) {
            throw new CassandraHiveMetaStoreException("Problem parsing CfDef: " + cfDef.name, ire);
        }
        table.setSd(sd);
        if ( log.isDebugEnabled() )
            log.debug("constructed table for CF:{} {}", cfDef.name, table.toString());
        return table;
    }

    private static String createMappingArray(CfDef cfDef)
    {
        StringBuilder sb = new StringBuilder();
        sb.append(cfDef.getKey_validation_class() != null ? cfDef.getKey_validation_class() : "BytesType")
        .append(",")
        .append(cfDef.getSubcomparator_type() != null ? cfDef.getSubcomparator_type() : "")
        .append(",")
        .append(cfDef.getComparator_type())
        .append(",")
        .append(cfDef.getDefault_validation_class() != null ? cfDef.getDefault_validation_class() : "");
        return sb.toString();
    }

    /**
     * Deduce the type information based on column validator, adding a FieldSchema to the provided
     * StorageDescriptor
     * @param sd
     * @param column
     */
    private void addTypeToStorageDescriptor(StorageDescriptor sd,
            ByteBuffer columnName, AbstractType comparator,
            AbstractType<?> validationType)
    {
        if ( validationType instanceof BytesType )
        {
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "string", buildTypeComment(validationType)));

        } else if ( validationType instanceof UTF8Type || validationType instanceof AsciiType )
        {
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "string", buildTypeComment(validationType)));
        } else if ( validationType instanceof LongType )
        {
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "int", buildTypeComment(validationType)));
        } else if ( validationType instanceof AbstractUUIDType )
        {
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "string", buildTypeComment(validationType)));
        } else if ( validationType instanceof IntegerType )
        {
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "bigint", buildTypeComment(validationType)));
        } else {
            // assume bytes
            sd.addToCols(new FieldSchema(comparator.getString(columnName), "string", buildTypeComment(validationType)));
        }
    }

    private static final String buildTypeComment(AbstractType type)
    {
        return String.format("Auto-created based on %s from Column Family meta data", type.getClass().getName());
    }
    /**
     * Contains 'system', as well as keyspace names for meta store, and Cassandra File System
     * FIXME: need to ref. the configuration value of the meta store keyspace. Should also coincide
     * with BRISK-190
     */
    public static final String[] SYSTEM_KEYSPACES = new String[] {
        "system", CassandraClientHolder.DEF_META_STORE_KEYSPACE, "cfs"
    };

}
TOP

Related Classes of org.apache.cassandra.hadoop.hive.metastore.SchemaManagerService

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.