Package org.kitesdk.data

Examples of org.kitesdk.data.DatasetDescriptor


    Dataset<GenericData.Record> dataset = managed.load("ns", "external");
    Assert.assertNotNull("Should open external dataset with managed", dataset);
    Assert.assertEquals("Should match external dataset",
        external.load("ns", "external").getDescriptor(), dataset.getDescriptor());

    DatasetDescriptor updatedDescriptor =
        new DatasetDescriptor.Builder(dataset.getDescriptor())
            .property("kite.writer.cache-size", "34")
            .schemaLiteral("\"string\"")
            .build();
View Full Code Here


    Dataset<GenericData.Record> dataset = external.load("default", "managed");
    Assert.assertNotNull("Should open managed dataset with external", dataset);
    Assert.assertEquals("Should match managed dataset",
        managed.load("default", "managed").getDescriptor(), dataset.getDescriptor());

    DatasetDescriptor updatedDescriptor =
        new DatasetDescriptor.Builder(dataset.getDescriptor())
            .property("kite.writer.cache-size", "34")
            .schemaLiteral("\"string\"")
            .build();
View Full Code Here

  @Test
  public void testCreateAssignsCorrectLocation() {
    ensureCreated();

    DatasetDescriptor loaded = provider.load(NAMESPACE, NAME);
    Path assignedPath = new Path(loaded.getLocation().getPath());
    Assert.assertEquals("Path should be in the test directory",
        new Path(testDirectory, new Path(NAMESPACE, NAME)), assignedPath);
  }
View Full Code Here

        new Path(testDirectory, new Path(NAMESPACE, NAME)), assignedPath);
  }

  @Test
  public void testRejectsDuplicatePartitionNames() {
    final DatasetDescriptor descriptor = new DatasetDescriptor.Builder(testDescriptor)
        .partitionStrategy(new PartitionStrategy.Builder()
            .identity("timestamp", "timestamp")
            .build())
        .build();
    TestHelpers.assertThrows("Should reject duplicate field and partition name",
View Full Code Here

            "Invalid partition <field:type>: " + partition);
      }
    }

    // building the descriptor validates the schema and strategy
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .partitionStrategy(strategyBuilder.build())
        .schema(open(avroSchemaFile))
        .build();

    String strategy = descriptor.getPartitionStrategy().toString(!minimize);

    output(strategy, console, outputPath);

    return 0;
  }
View Full Code Here

            Iterators.getNext(parts, null),
            Iterators.getNext(parts, null));
      }
    }

    DatasetDescriptor descriptor = descriptorBuilder.build();

    if (isDataUri(dataset)) {
      Datasets.<GenericData.Record, Dataset<GenericData.Record>> update(dataset, descriptor, GenericData.Record.class);
    } else {
      getDatasetRepository().update(namespace, dataset, descriptor);
View Full Code Here

    // this used to be a relative external URI, but is now a managed URI
    String uri = "dataset:hive:data/ds";

    DatasetRepository repo = DatasetRepositories
        .repositoryFor("repo:hive:/tmp/data");
    DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
        .location("file:/tmp/data/ds") // old location
        .build();
    Dataset<GenericRecord> expected = repo.create(
        "default", "ds", withLocation, GenericRecord.class);
View Full Code Here

    // this used to be a relative external URI, but is now a managed URI
    String uri = "dataset:hive:ds";

    DatasetRepository repo = DatasetRepositories
        .repositoryFor("repo:hive:/tmp/data");
    DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
        .location("file:/tmp/data/ds") // old location
        .build();
    Dataset<GenericRecord> expected = repo.create(
        "default", "ds", withLocation, GenericRecord.class);
View Full Code Here

    // this used to be a relative external URI, but is now a managed URI
    String uri = "dataset:hive:/data/ds";

    DatasetRepository repo = DatasetRepositories
        .repositoryFor("repo:hive:/tmp/data");
    DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
        .location("file:/tmp/data/ds") // old location
        .build();
    Dataset<GenericRecord> expected = repo.create(
        "default", "ds", withLocation, GenericRecord.class);
View Full Code Here

    // this used to be a relative external URI, but is now a managed URI
    String uri = "dataset:hive:/ds";

    DatasetRepository repo = DatasetRepositories
        .repositoryFor("repo:hive:/tmp/data");
    DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
        .location("file:/tmp/data/ds") // old location
        .build();
    Dataset<GenericRecord> expected = repo.create(
        "default", "ds", withLocation, GenericRecord.class);
View Full Code Here

TOP

Related Classes of org.kitesdk.data.DatasetDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.