Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.DatasetRepository


    Schema schema = new Schema.Parser().parse(
        Resources.getResource("combined_log_format.avsc").openStream());

    // Create the dataset
    DatasetRepository repo = DatasetRepositories.open(datasetUri);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder()
        .schema(schema).build();
    Dataset<Object> dataset = repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
    AvroType<GenericData.Record> outputType = Avros.generics(schema);
    PCollection<String> lines = readTextFile(input);
View Full Code Here


      Object message) {
    if (!initialized) {
      // initialize here rather than in activateOptions to avoid initialization
      // cycle in Configuration and log4j
      try {
        DatasetRepository repo = DatasetRepositories.open(datasetRepositoryUri);
        Dataset dataset = repo.load(datasetName);
        if (dataset.getDescriptor().isPartitioned()) {
          partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
        }
        URL schemaUrl = dataset.getDescriptor().getSchemaUrl();
        if (schemaUrl != null) {
View Full Code Here

    new Loader().load();
  }

  @Test
  public void testLocalRelative() throws URISyntaxException {
    DatasetRepository repository = DatasetRepositories.open(new URI("repo:file:target/dsr-repo-test"));

    // We only do the deeper implementation checks one per combination.
    Assert.assertNotNull("Received a repository", repository);
    Assert.assertTrue("Repo is a FileSystem repo",
        repository instanceof FileSystemDatasetRepository);
View Full Code Here

        expected, fsProvider.getRootDirectory());
  }

  @Test
  public void testLocalAbsolute() throws URISyntaxException {
    DatasetRepository repository = DatasetRepositories.open(new URI("repo:file:/tmp/dsr-repo-test"));

    FileSystemMetadataProvider provider = (FileSystemMetadataProvider)
        ((FileSystemDatasetRepository) repository).getMetadataProvider();
    Assert.assertEquals("Root directory should be the correct qualified path",
        new Path("file:/tmp/dsr-repo-test"), provider.getRootDirectory());
View Full Code Here

  }

  @Test
  public void testHdfsAbsolute() throws URISyntaxException {
    URI hdfsUri = getDFS().getUri();
    DatasetRepository repository = DatasetRepositories.open(
        new URI("repo:hdfs://" + hdfsUri.getAuthority() + "/tmp/dsr-repo-test"));

    // We only do the deeper implementation checks one per combination.
    Assert.assertNotNull("Received a repository", repository);
    Assert.assertTrue("Repo is a FileSystem repo",
View Full Code Here

    new com.cloudera.cdk.data.hcatalog.impl.Loader().load();
  }

  @Test
  public void testManagedURI() {
    DatasetRepository repo = DatasetRepositories.open("repo:hive");

    Assert.assertNotNull("Received a repository", repo);
    Assert.assertTrue("Repo should be a HCatalogDatasetRepository",
        repo instanceof HCatalogDatasetRepository);
    MetadataProvider provider = ((HCatalogDatasetRepository) repo)
View Full Code Here

  }

  @Test
  public void testManagedURIWithRootPath() {
    // URIs with "/" as the path should open managed repositories
    DatasetRepository repo = DatasetRepositories.open(
        "repo:hive:/");

    Assert.assertNotNull("Received a repository", repo);
    Assert.assertTrue("Repo should be a HCatalogDatasetRepository",
        repo instanceof HCatalogDatasetRepository);
View Full Code Here

  }

  @Test
  public void testExternalURI() {
    URI hdfsUri = getDFS().getUri();
    DatasetRepository repo = DatasetRepositories.open(
        "repo:hive:/tmp/hive-repo?hdfs-host=" + hdfsUri.getHost() +
        "&hdfs-port=" + hdfsUri.getPort());

    Assert.assertNotNull("Received a repository", repo);
    org.junit.Assert.assertTrue("Repo is a FileSystem repo",
View Full Code Here

  @Parameter(property = "cdk.datasetName", required = true)
  private String datasetName;

  @Override
  public void execute() throws MojoExecutionException, MojoFailureException {
    DatasetRepository repo = getDatasetRepository();
    repo.delete(datasetName);
  }
View Full Code Here

    if (avroSchemaFile == null && avroSchemaReflectClass == null) {
      throw new IllegalArgumentException("One of cdk.avroSchemaFile or " +
          "cdk.avroSchemaReflectClass must be specified");
    }

    DatasetRepository repo = getDatasetRepository();

    DatasetDescriptor descriptor = repo.load(datasetName).getDescriptor();
    DatasetDescriptor.Builder descriptorBuilder =
        new DatasetDescriptor.Builder(descriptor);
    configureSchema(descriptorBuilder, avroSchemaFile, avroSchemaReflectClass);

    repo.update(datasetName, descriptorBuilder.build());
  }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.data.DatasetRepository

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.