Package org.kitesdk.data

Examples of org.kitesdk.data.DatasetDescriptor


        Datasets.delete("dataset:hive:/tmp/datasets/test"));
    Assert.assertFalse("Delete should return false if there is no dataset",
        Datasets.delete("dataset:hive:/tmp/datasets/test"));

    // recreate the default.test dataset, but with a different storage location
    DatasetDescriptor doNotDelete = new DatasetDescriptor.Builder(descriptor)
        .location(URI.create("file:/tmp/datasets/default/test"))
        .build();
    metastore.createTable(HiveUtils.tableForDescriptor(
        "default", "test", doNotDelete, true));
View Full Code Here


public class AvroKeyEntitySchemaParser implements
    KeyEntitySchemaParser<AvroKeySchema, AvroEntitySchema> {

  @Override
  public AvroKeySchema parseKeySchema(String rawSchema) {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(rawSchema)
        .build();
    return new AvroKeySchema(
        descriptor.getSchema(), descriptor.getPartitionStrategy());
  }
View Full Code Here

    Dataset<GenericRecord> inputDataset = repo.create("default", "in",
        new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity).build());

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

    DatasetWriter<GenericRecord> writer = inputDataset.newWriter();
View Full Code Here

    Dataset<GenericRecord> inputDataset = repo.create("default", "in",
        new DatasetDescriptor.Builder()
            .schemaLiteral(testGenericEntity).build());

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

    DatasetKeyInputFormat.configure(job).readFrom(inputDataset);
View Full Code Here

  @Override
  public AvroKeySchema parseKeySchema(String rawSchema,
      PartitionStrategy partitionStrategy) {
    // use DatasetDescriptor.Builder because it checks consistency
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(rawSchema)
        .partitionStrategy(partitionStrategy)
        .build();
    return new AvroKeySchema(
        descriptor.getSchema(), descriptor.getPartitionStrategy());
  }
View Full Code Here

        descriptor.getSchema(), descriptor.getPartitionStrategy());
  }

  @Override
  public AvroEntitySchema parseEntitySchema(String rawSchema) {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(rawSchema)
        .build();
    return new AvroEntitySchema(
        descriptor.getSchema(), rawSchema, descriptor.getColumnMapping());
  }
View Full Code Here

  @Override
  public AvroEntitySchema parseEntitySchema(String rawSchema,
      ColumnMapping columnMapping) {
    // use DatasetDescriptor.Builder because it checks consistency
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(rawSchema)
        .columnMapping(columnMapping)
        .build();
    return new AvroEntitySchema(
        descriptor.getSchema(), rawSchema, descriptor.getColumnMapping());
  }
View Full Code Here

  public boolean delete(String namespace, String name) {
    Preconditions.checkArgument(DEFAULT_NAMESPACE.equals(namespace),
        "Non-default namespaces are not supported");
    Preconditions.checkNotNull(name, "Dataset name cannot be null");

    DatasetDescriptor descriptor;
    try {
      descriptor = load(namespace, name);
    } catch (DatasetNotFoundException e) {
      return false;
    }
    Preconditions.checkState(descriptor.isColumnMapped(),
        "[BUG] Existing descriptor has no column mapping");

    String tableName = getTableName(name);
    String entityName = getEntityName(name);

    schemaManager.deleteSchema(tableName, entityName);

    // TODO: this may delete columns for other entities if they share column families
    // TODO: https://issues.cloudera.org/browse/CDK-145, https://issues.cloudera.org/browse/CDK-146
    for (String columnFamily : descriptor.getColumnMapping().getRequiredColumnFamilies()) {
      try {
        hbaseAdmin.disableTable(tableName);
        try {
          hbaseAdmin.deleteColumn(tableName, columnFamily);
        } finally {
View Full Code Here

      if (resolved.equals(namespace)) {
        // the requested dataset already exists
        throw new DatasetExistsException(
            "Metadata already exists for dataset: " + namespace + "." + name);
      } else {
        DatasetDescriptor loaded = load(resolved, name);
        // replacing old default.name table
        LOG.warn("Creating table managed table {}.{}: replaces default.{}",
            new Object[]{namespace, name, name});
        // validate that the new metadata can read the existing data
        Compatibility.checkUpdate(loaded, descriptor);
        // if the table in the default namespace matches, then the location is
        // either null (and should be set to the existing) or matches. either
        // way, use the loaded location.
        location = loaded.getLocation();
      }
    }

    LOG.info("Creating a managed Hive table named: " + name);

    boolean isExternal = (location != null);

    DatasetDescriptor toCreate = descriptor;
    if (isExternal) {
      // add the location to the descriptor that will be used
      toCreate = new DatasetDescriptor.Builder(descriptor)
          .location(location)
          .build();
    }

    // construct the table metadata from a descriptor
    Table table = HiveUtils.tableForDescriptor(
        namespace, name, toCreate, isExternal);

    // create it
    getMetaStoreUtil().createTable(table);

    // load the created table to get the final data location
    Table newTable = getMetaStoreUtil().getTable(namespace, name);

    DatasetDescriptor newDescriptor = new DatasetDescriptor.Builder(descriptor)
        .location(newTable.getSd().getLocation())
        .build();

    if (isExternal) {
      FileSystemUtil.ensureLocationExists(newDescriptor, conf);
View Full Code Here

      }
    }

    LOG.info("Creating an external Hive table: {}.{}", namespace, name);

    DatasetDescriptor newDescriptor = descriptor;

    if (descriptor.getLocation() == null) {
      // create a new descriptor with the dataset's location
      newDescriptor = new DatasetDescriptor.Builder(descriptor)
          .location(pathForDataset(namespace, name))
View Full Code Here

TOP

Related Classes of org.kitesdk.data.DatasetDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.