Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.PartitionStrategy$Builder


  }

  @Test
  @SuppressWarnings("deprecation")
  public void testPartitionedWriterSingle() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

    FileSystemDataset<Record> ds = new FileSystemDataset.Builder()
        .name("partitioned-users")
        .configuration(getConfiguration())
        .descriptor(new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
            .format(format)
            .location(testDirectory)
            .partitionStrategy(partitionStrategy)
            .build())
        .build();

    Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
      .isPartitioned());
    Assert.assertEquals(partitionStrategy, ds.getDescriptor()
      .getPartitionStrategy());

    writeTestUsers(ds, 10);
    Assert.assertTrue("Partitioned directory 0 exists",
      fileSystem.exists(new Path(testDirectory, "username=0")));
    Assert.assertTrue("Partitioned directory 1 exists",
      fileSystem.exists(new Path(testDirectory, "username=1")));
    checkTestUsers(ds, 10);
    PartitionKey key0 = partitionStrategy.partitionKey(0);
    PartitionKey key1 = partitionStrategy.partitionKey(1);
    int total = readTestUsersInPartition(ds, key0, null)
      + readTestUsersInPartition(ds, key1, null);
    Assert.assertEquals(10, total);

    testPartitionKeysAreEqual(ds, key0, key1);
View Full Code Here


  }

  @Test
  @SuppressWarnings("deprecation")
  public void testPartitionedWriterDouble() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
      .hash("username", 2).hash("email", 3).build();

    FileSystemDataset<Record> ds = new FileSystemDataset.Builder()
        .name("partitioned-users")
        .configuration(getConfiguration())
        .descriptor(new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
            .format(format)
            .location(testDirectory)
            .partitionStrategy(partitionStrategy)
            .build())
        .build();

    Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
      .isPartitioned());
    Assert.assertEquals(partitionStrategy, ds.getDescriptor()
      .getPartitionStrategy());

    writeTestUsers(ds, 10);
    checkTestUsers(ds, 10);
    PartitionKey key0 = Accessor.getDefault().newPartitionKey(0);
    PartitionKey key1 = Accessor.getDefault().newPartitionKey(1);
    int total = readTestUsersInPartition(ds, key0, "email")
      + readTestUsersInPartition(ds, key0, "email");
    Assert.assertEquals(10, total);

    total = 0;
    for (int i1 = 0; i1 < 2; i1++) {
      for (int i2 = 0; i2 < 3; i2++) {
        String part = "username=" + i1 + "/email=" + i2;
        Assert.assertTrue("Partitioned directory " + part + " exists",
          fileSystem.exists(new Path(testDirectory, part)));
        total += readTestUsersInPartition(ds,
          partitionStrategy.partitionKey(i1, i2), null);
      }
    }
    Assert.assertEquals(10, total);

    testPartitionKeysAreEqual(ds, key0, key1);
View Full Code Here

  }

  @Test
  @SuppressWarnings("deprecation")
  public void testGetPartitionReturnsNullIfNoAutoCreate() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

    FileSystemDataset<Record> ds = new FileSystemDataset.Builder()
        .name("partitioned-users")
        .configuration(getConfiguration())
        .descriptor(new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
            .format(format)
            .location(testDirectory)
            .partitionStrategy(partitionStrategy)
            .build())
        .build();

    Assert
      .assertNull(ds.getPartition(partitionStrategy.partitionKey(1), false));
  }
View Full Code Here

  }

  @Test
  @SuppressWarnings("deprecation")
  public void testWriteToSubpartition() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
      .hash("username", "username_part", 2).hash("email", 3).build();

    FileSystemDataset<Record> ds = new FileSystemDataset.Builder()
        .name("partitioned-users")
        .configuration(getConfiguration())
View Full Code Here

  }

  @Test
  @SuppressWarnings("deprecation")
  public void testDropPartition() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
      .hash("username", 2).build();

    FileSystemDataset<Record> ds = new FileSystemDataset.Builder()
        .name("partitioned-users")
        .configuration(getConfiguration())
        .descriptor(new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
            .format(format)
            .location(testDirectory)
            .partitionStrategy(partitionStrategy)
            .build())
        .build();

    writeTestUsers(ds, 10);

    Assert.assertTrue(
      fileSystem.isDirectory(new Path(testDirectory, "username=0")));
    Assert.assertTrue(
      fileSystem.isDirectory(new Path(testDirectory, "username=1")));

    ds.dropPartition(partitionStrategy.partitionKey(0));
    Assert.assertFalse(
      fileSystem.isDirectory(new Path(testDirectory, "username=0")));

    ds.dropPartition(partitionStrategy.partitionKey(1));
    Assert.assertFalse(
      fileSystem.isDirectory(new Path(testDirectory, "username=1")));

    DatasetException caught = null;

    try {
      ds.dropPartition(partitionStrategy.partitionKey(0));
    } catch (DatasetException e) {
      caught = e;
    }

    Assert.assertNotNull(caught);
View Full Code Here

  @Test
  public void testPartitionStrategy() {
    String expr = "hash(\"username\", \"username_part\", 2)";
    PartitionExpression expression = new PartitionExpression(expr, true);

    PartitionStrategy strategy = expression.evaluate();
    List<FieldPartitioner> fieldPartitioners = strategy.getFieldPartitioners();
    Assert.assertEquals(1, fieldPartitioners.size());
    FieldPartitioner fp = fieldPartitioners.get(0);
    Assert.assertEquals(HashFieldPartitioner.class, fp.getClass());
    Assert.assertEquals("username", fp.getSourceName());
    Assert.assertEquals("username_part", fp.getName());
View Full Code Here

  public void testSubpartitionStrategy() {
    String expr = "[hash(\"username\", \"username_part\", 2), hash(\"username2\", " +
        "\"username2_part\", 3)]";
    PartitionExpression expression = new PartitionExpression(expr, true);

    PartitionStrategy strategy = expression.evaluate();
    List<FieldPartitioner> fieldPartitioners = strategy.getFieldPartitioners();
    Assert.assertEquals(2, fieldPartitioners.size());

    FieldPartitioner fp0 = fieldPartitioners.get(0);
    Assert.assertEquals(HashFieldPartitioner.class, fp0.getClass());
    Assert.assertEquals("username_part", fp0.getName());
View Full Code Here

  @Test
  public void testMixedSubpartitionStrategy() {
    String expr = "[year(\"timestamp\", \"year\"), minute(\"timestamp\", \"minute\")]";
    PartitionExpression expression = new PartitionExpression(expr, true);

    PartitionStrategy strategy = expression.evaluate();
    List<FieldPartitioner> fieldPartitioners = strategy.getFieldPartitioners();
    Assert.assertEquals(2, fieldPartitioners.size());

    FieldPartitioner fp0 = fieldPartitioners.get(0);
    Assert.assertEquals(YearFieldPartitioner.class, fp0.getClass());
    Assert.assertEquals("timestamp", fp0.getSourceName());
View Full Code Here

        Formats.AVRO, repo.load(NAME).getDescriptor().getFormat());
  }

  @Test
  public void testUpdateFailsWithPartitionStrategyChange() {
    PartitionStrategy ps1 = new PartitionStrategy.Builder()
        .hash("username", 2)
        .build();
    PartitionStrategy ps2 = new PartitionStrategy.Builder()
        .hash("username", 2)
        .hash("email", 3)
        .build();

    Dataset<Record> dataset = repo.create(NAME,
View Full Code Here

  }

  @Test
  @SuppressWarnings("deprecation")
  public void testPartitionedSourceAndTarget() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
        "username", 2).build();

    Dataset<Record> inputDataset = repo.create("in", new DatasetDescriptor.Builder()
        .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
    Dataset<Record> outputDataset = repo.create("out", new DatasetDescriptor.Builder()
        .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

    writeTestUsers(inputDataset, 10);

    PartitionKey key = partitionStrategy.partitionKey(0);
    Dataset<Record> inputPart0 = inputDataset.getPartition(key, false);
    Dataset<Record> outputPart0 = outputDataset.getPartition(key, true);

    Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
    PCollection<GenericData.Record> data = pipeline.read(
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.data.PartitionStrategy$Builder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.