Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.DatasetRepository.create()


    // Create the dataset
    DatasetRepository repo = DatasetRepositories.open(datasetUri);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder()
        .schema(schema).build();
    Dataset<Object> dataset = repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
    AvroType<GenericData.Record> outputType = Avros.generics(schema);
    PCollection<String> lines = readTextFile(input);
View Full Code Here


    if (partitionExpression != null) {
      descriptorBuilder.partitionStrategy(Accessor.getDefault().fromExpression(partitionExpression));
    }

    repo.create(datasetName, descriptorBuilder.build());
  }

}
View Full Code Here

    // Create a dataset of users with the Avro schema in the repository
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:user.avsc")
        .build();
    Dataset<GenericRecord> users = repo.create("users", descriptor);

    // Get a writer for the dataset and write some users to it
    DatasetWriter<GenericRecord> writer = users.newWriter();
    try {
      writer.open();
View Full Code Here

    // Create a dataset of users with the Avro schema in the repository
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:user.avsc")
        .build();
    Dataset<GenericRecord> users = repo.create("users", descriptor);

    // Get a writer for the dataset and write some users to it
    DatasetWriter<GenericRecord> writer = users.newWriter();
    try {
      writer.open();
View Full Code Here

    // Create a dataset of users with the Avro schema in the repository
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:user.avsc")
        .partitionStrategy(partitionStrategy)
        .build();
    Dataset<GenericRecord> users = repo.create("users", descriptor);

    // Get a writer for the dataset and write some users to it
    DatasetWriter<GenericRecord> writer = users.newWriter();
    try {
      writer.open();
View Full Code Here

    // repository
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:user.avsc")
        .format(Formats.PARQUET)
        .build();
    Dataset<GenericRecord> users = repo.create("users", descriptor);

    // Get a writer for the dataset and write some users to it
    DatasetWriter<GenericRecord> writer = users.newWriter();
    try {
      writer.open();
View Full Code Here

    DatasetRepository repo = DatasetRepositories.open("repo:file:/tmp/hello-cdk");

    // Create a dataset of Hellos
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(Hello.class).build();
    Dataset<Hello> hellos = repo.create("hellos", descriptor);

    // Write some Hellos in to the dataset
    DatasetWriter<Hello> writer = hellos.newWriter();
    try {
      writer.open();
View Full Code Here

    // Create a dataset of products with the Avro schema in the repository
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(Product.class)
        .build();
    Dataset<Product> products = repo.create("products", descriptor);

    // Get a writer for the dataset and write some products to it
    DatasetWriter<Product> writer = products.newWriter();
    try {
      writer.open();
View Full Code Here

    // where the schema is stored
    URI schemaURI = URI.create("resource:simple-log.avsc");

    // create a Parquet dataset for long-term storage
    repo.create("logs", new DatasetDescriptor.Builder()
        .format(Formats.PARQUET)
        .schemaUri(schemaURI)
        .partitionStrategy(new PartitionStrategy.Builder()
            .year("timestamp", "year")
            .month("timestamp", "month")
View Full Code Here

            .day("timestamp", "day")
            .build())
        .build());

    // create an Avro dataset to temporarily hold data
    repo.create("logs-staging", new DatasetDescriptor.Builder()
        .format(Formats.AVRO)
        .schemaUri(schemaURI)
        .partitionStrategy(new PartitionStrategy.Builder()
            .day("timestamp", "day")
            .build())
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.