Package com.cloudera.cdk.data.hbase

Source Code of com.cloudera.cdk.data.hbase.HBaseMetadataProvider

/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hbase;

import com.cloudera.cdk.data.DatasetDescriptor;
import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.MetadataProviderException;
import com.cloudera.cdk.data.PartitionStrategy;
import com.cloudera.cdk.data.hbase.avro.AvroEntitySchema;
import com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser;
import com.cloudera.cdk.data.hbase.impl.Constants;
import com.cloudera.cdk.data.hbase.impl.EntitySchema;
import com.cloudera.cdk.data.hbase.impl.SchemaManager;
import com.cloudera.cdk.data.spi.AbstractMetadataProvider;

import java.io.IOException;
import java.util.Collection;
import java.util.Set;

import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HBaseMetadataProvider extends AbstractMetadataProvider {

  private static final Logger logger = LoggerFactory
      .getLogger(HBaseMetadataProvider.class);

  private HBaseAdmin hbaseAdmin;
  private SchemaManager schemaManager;

  public HBaseMetadataProvider(HBaseAdmin hbaseAdmin, SchemaManager schemaManager) {
    this.hbaseAdmin = hbaseAdmin;
    this.schemaManager = schemaManager;
  }

  @Override
  public DatasetDescriptor create(String name, DatasetDescriptor descriptor) {

    try {
      String managedSchemaName = "managed_schemas"; // TODO: allow table to be specified
      if (!hbaseAdmin.tableExists(managedSchemaName)) {
        HTableDescriptor table = new HTableDescriptor(managedSchemaName);
        table.addFamily(new HColumnDescriptor("meta"));
        table.addFamily(new HColumnDescriptor("schema"));
        table.addFamily(new HColumnDescriptor("_s"));
        hbaseAdmin.createTable(table);
      }
    } catch (IOException e) {
      throw new DatasetException(e);
    }

    String entitySchemaString = descriptor.getSchema().toString(true);

    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    AvroEntitySchema entitySchema = parser.parseEntitySchema(entitySchemaString);

    String tableName = getTableName(name);
    String entityName = getEntityName(name);

    schemaManager.refreshManagedSchemaCache(tableName, entityName);
    schemaManager.createSchema(tableName, entityName,
        entitySchemaString,
        "com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser",
        "com.cloudera.cdk.data.hbase.avro.AvroKeySerDe",
        "com.cloudera.cdk.data.hbase.avro.AvroEntitySerDe");

    try {
      if (!hbaseAdmin.tableExists(tableName)) {
        HTableDescriptor desc = new HTableDescriptor(tableName);
        desc.addFamily(new HColumnDescriptor(Constants.SYS_COL_FAMILY));
        desc.addFamily(new HColumnDescriptor(Constants.OBSERVABLE_COL_FAMILY));
        for (String columnFamily : entitySchema.getRequiredColumnFamilies()) {
          desc.addFamily(new HColumnDescriptor(columnFamily));
        }
        hbaseAdmin.createTable(desc);
      } else {
        Set<String> familiesToAdd = entitySchema.getRequiredColumnFamilies();
        familiesToAdd.add(new String(Constants.SYS_COL_FAMILY));
        familiesToAdd.add(new String(Constants.OBSERVABLE_COL_FAMILY));
        HTableDescriptor desc = hbaseAdmin.getTableDescriptor(tableName
            .getBytes());
        for (HColumnDescriptor columnDesc : desc.getColumnFamilies()) {
          String familyName = columnDesc.getNameAsString();
          if (familiesToAdd.contains(familyName)) {
            familiesToAdd.remove(familyName);
          }
        }
        if (familiesToAdd.size() > 0) {
          hbaseAdmin.disableTable(tableName);
          try {
            for (String family : familiesToAdd) {
              hbaseAdmin.addColumn(tableName, new HColumnDescriptor(family));
            }
          } finally {
            hbaseAdmin.enableTable(tableName);
          }
        }
      }
    } catch (IOException e) {
      throw new DatasetException(e);
    }
    return withPartitionStrategy(descriptor);
  }

  @Override
  public DatasetDescriptor update(String name, DatasetDescriptor descriptor) {
    String tableName = getTableName(name);
    String entityName = getEntityName(name);
    schemaManager.refreshManagedSchemaCache(tableName, entityName);
    String schemaString = descriptor.getSchema().toString();
    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    EntitySchema entitySchema = parser.parseEntitySchema(schemaString);
    if (schemaManager.getEntityVersion(tableName, entityName, entitySchema) == -1) {
      schemaManager.migrateSchema(tableName, entityName, schemaString);
    } else {
      logger.info("Schema hasn't changed, not migrating: (" + name + ")");
    }
    return withPartitionStrategy(descriptor);
  }

  @SuppressWarnings("deprecation")
  @Override
  public DatasetDescriptor load(String name) {
    if (!exists(name)) {
      throw new com.cloudera.cdk.data.NoSuchDatasetException("No such dataset: " + name);
    }
    String tableName = getTableName(name);
    String entityName = getEntityName(name);
    return getDatasetDescriptor(schemaManager.getEntitySchema(tableName, entityName).getRawSchema());
  }

  @Override
  public boolean delete(String name) {
    DatasetDescriptor descriptor = load(name);
    String tableName = getTableName(name);
    String entityName = getEntityName(name);

    schemaManager.deleteSchema(tableName, entityName);

    String entitySchemaString = descriptor.getSchema().toString(true);

    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    AvroEntitySchema entitySchema = parser.parseEntitySchema(entitySchemaString);

    // TODO: this may delete columns for other entities if they share column families
    // TODO: https://issues.cloudera.org/browse/CDK-145, https://issues.cloudera.org/browse/CDK-146
    for (String columnFamily : entitySchema.getRequiredColumnFamilies()) {
      try {
        hbaseAdmin.disableTable(tableName);
        try {
          hbaseAdmin.deleteColumn(tableName, columnFamily);
        } finally {
          hbaseAdmin.enableTable(tableName);
        }
      } catch (IOException e) {
        throw new MetadataProviderException(e);
      }
    }
    return true;
  }

  @Override
  public boolean exists(String name) {
    String tableName = getTableName(name);
    String entityName = getEntityName(name);
    schemaManager.refreshManagedSchemaCache(tableName, entityName);
    return schemaManager.hasManagedSchema(tableName, entityName);
  }

  public Collection<String> list() {
    throw new UnsupportedOperationException();
  }

  static String getTableName(String name) {
    // TODO: change to use namespace (CDK-140)
    if (name.contains(".")) {
      return name.substring(0, name.indexOf('.'));
    }
    return name;
  }

  static String getEntityName(String name) {
    return name.substring(name.indexOf('.') + 1);
  }

  private static DatasetDescriptor getDatasetDescriptor(String schemaString) {
    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    PartitionStrategy partitionStrategy = parser.parseKeySchema(schemaString)
        .getPartitionStrategy();
    return new DatasetDescriptor.Builder()
        .schemaLiteral(schemaString)
        .partitionStrategy(partitionStrategy)
        .build();
  }

  // TODO: move the logic of parsing keys to DatasetDescriptor itself
  private static DatasetDescriptor withPartitionStrategy(DatasetDescriptor descriptor) {
    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    PartitionStrategy partitionStrategy = parser.parseKeySchema(descriptor.getSchema().toString())
        .getPartitionStrategy();
    return new DatasetDescriptor.Builder()
        .schema(descriptor.getSchema())
        .partitionStrategy(partitionStrategy)
        .location(descriptor.getLocation())
        .build();
  }

}
TOP

Related Classes of com.cloudera.cdk.data.hbase.HBaseMetadataProvider

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.