Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.PartitionStrategy$Builder


    } catch (IOException e) {
      throw new DatasetException("Unable to locate or create dataset partition directory " + partitionDirectory, e);
    }

    int partitionDepth = key.getLength();
    PartitionStrategy subpartitionStrategy = Accessor.getDefault()
        .getSubpartitionStrategy(partitionStrategy, partitionDepth);

    return new FileSystemDataset.Builder()
        .name(name)
        .fileSystem(fileSystem)
View Full Code Here


    }

    for (FileStatus stat : fileStatuses) {
      Path p = fileSystem.makeQualified(stat.getPath());
      PartitionKey key = fromDirectoryName(p);
      PartitionStrategy subPartitionStrategy = Accessor.getDefault()
          .getSubpartitionStrategy(partitionStrategy, 1);
      Builder builder = new FileSystemDataset.Builder()
          .name(name)
          .fileSystem(fileSystem)
          .descriptor(new DatasetDescriptor.Builder(descriptor)
View Full Code Here

          "root directory to dataset (directory: %s).", partitionUri, directoryUri));
    }

    Iterable<String> parts = Splitter.on('/').split(relativizedUri.getPath());

    PartitionStrategy partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
    List<FieldPartitioner> fieldPartitioners = partitionStrategy.getFieldPartitioners();
    if (Iterables.size(parts) > fieldPartitioners.size()) {
      throw new IllegalArgumentException(String.format("Too many partition directories " +
          "for %s (%s), expecting %s.", partitionUri, Iterables.size(parts),
          fieldPartitioners.size()));
    }
View Full Code Here

    this.testProvider = new FileSystemMetadataProvider.Builder().configuration(conf)
        .rootDirectory(testDirectory).build();
    this.repo = new FileSystemDatasetRepository.Builder().configuration(conf)
        .metadataProvider(testProvider).build();

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();
    FileSystemDataset<Object> users = (FileSystemDataset<Object>) repo.create(
        "users",
        new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA)
View Full Code Here

  }

  @Test
  @SuppressWarnings("unchecked")
  public void testFromKey() {
    PartitionStrategy strategy = new PartitionStrategy.Builder()
        .year("timestamp")
        .month("timestamp")
        .day("timestamp")
        .build();
View Full Code Here

  }

  @Test
  @SuppressWarnings("unchecked")
  public void testToKey() {
    PartitionStrategy strategy = new PartitionStrategy.Builder()
        .year("timestamp")
        .month("timestamp")
        .day("timestamp")
        .build();
View Full Code Here

    // convert the schema to Hive columns
    table.setFields(convertSchema(descriptor.getSchema()));

    // copy partitioning info
    if (descriptor.isPartitioned()) {
      PartitionStrategy ps = descriptor.getPartitionStrategy();
      table.setProperty(PARTITION_EXPRESSION_PROPERTY_NAME,
          Accessor.getDefault().toExpression(ps));
      List<FieldSchema> partCols = Lists.newArrayList();
      for (FieldPartitioner fp : ps.getFieldPartitioners()) {
        partCols.add(new FieldSchema(fp.getName(), getHiveType(fp.getType()),
            "Partition column derived from '" + fp.getSourceName() + "' column, " +
                "generated by CDK."));
      }
      table.setPartCols(partCols);
View Full Code Here

    return name.substring(name.indexOf('.') + 1);
  }

  private static DatasetDescriptor getDatasetDescriptor(String schemaString) {
    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    PartitionStrategy partitionStrategy = parser.parseKeySchema(schemaString)
        .getPartitionStrategy();
    return new DatasetDescriptor.Builder()
        .schemaLiteral(schemaString)
        .partitionStrategy(partitionStrategy)
        .build();
View Full Code Here

  }

  // TODO: move the logic of parsing keys to DatasetDescriptor itself
  private static DatasetDescriptor withPartitionStrategy(DatasetDescriptor descriptor) {
    AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
    PartitionStrategy partitionStrategy = parser.parseKeySchema(descriptor.getSchema().toString())
        .getPartitionStrategy();
    return new DatasetDescriptor.Builder()
        .schema(descriptor.getSchema())
        .partitionStrategy(partitionStrategy)
        .location(descriptor.getLocation())
View Full Code Here

  private static final Logger logger = LoggerFactory
      .getLogger(TestPartitionStrategy.class);

  @Test
  public void test() throws Exception {
    final PartitionStrategy p = new PartitionStrategy.Builder()
        .identity("month", Integer.class, 12)
        .hash("userId", 7)
        .build();

    List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners();
    Assert.assertEquals(2, fieldPartitioners.size());

    FieldPartitioner fp0 = fieldPartitioners.get(0);
    assertEquals("month", fp0.getName());
    assertEquals(12, fp0.getCardinality());

    FieldPartitioner fp1 = fieldPartitioners.get(1);
    assertEquals("userId", fp1.getName());
    assertEquals(7, fp1.getCardinality());

    assertEquals(12 * 7, p.getCardinality()); // useful for writers
  }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.data.PartitionStrategy$Builder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.