Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.DatasetDescriptor


    Schema schema = new Schema.Parser().parse(
        Resources.getResource("combined_log_format.avsc").openStream());

    // Create the dataset
    DatasetRepository repo = DatasetRepositories.open(datasetUri);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder()
        .schema(schema).build();
    Dataset<Object> dataset = repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
View Full Code Here


    HBaseTestUtils.util.deleteTable(Bytes.toBytes(managedTableName));
    HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder()
        .configuration(HBaseTestUtils.getConf()).build();
    String testGenericEntity = AvroUtils.inputStreamToString(
        HBaseDatasetRepositoryTest.class.getResourceAsStream("/TestGenericEntity.avsc"));
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();
    dataset = repo.create("testtable", descriptor);
    for (int i = 0; i < 10; i++) {
      dataset.put(HBaseDatasetRepositoryTest.createGenericEntity(i));
View Full Code Here

  @Test
  public void testLoadSetsURIs() throws IOException {
    ensureCreated();

    DatasetDescriptor loaded = provider.load(NAME);
    Assert.assertNotNull("Loaded descriptor should have a location",
        loaded.getLocation());
    if (distributed) {
      // purposely call new Configuration() to test that the URI has HDFS info
      Assert.assertEquals(
          getDFS(),
          FileSystem.get(loaded.getLocation(), new Configuration()));
      Assert.assertEquals(
          "hdfs",
          loaded.getLocation().getScheme());
      Assert.assertEquals(
          getDFS().getUri().getAuthority(),
          loaded.getLocation().getAuthority());
    } else {
      // purposely call new Configuration() to test that the URI has FS info
      Assert.assertEquals(
          getFS(),
          FileSystem.get(loaded.getLocation(), new Configuration()));
      Assert.assertEquals(
          "file",
          loaded.getLocation().getScheme());
      Assert.assertEquals(
          getFS().getUri().getAuthority(),
          loaded.getLocation().getAuthority());
    }
  }
View Full Code Here

    }
  }

  @Test
  public void testCreateSetsURIs() throws IOException {
    DatasetDescriptor created = provider.create(NAME, testDescriptor);
    Assert.assertNotNull("Created descriptor should have a location",
        created.getLocation());
    if (distributed) {
      Assert.assertEquals(
          "hdfs",
          created.getLocation().getScheme());
      Assert.assertEquals(
          getDFS().getUri().getAuthority(),
          created.getLocation().getAuthority());
    } else {
      Assert.assertEquals(
          "file",
          created.getLocation().getScheme());
      Assert.assertEquals(
          getFS().getUri().getAuthority(),
          created.getLocation().getAuthority());
    }
  }
View Full Code Here

  @Test
  public void testDeleteRemovesMetadataFiles() throws IOException {
    testCreateMetadataFiles();

    DatasetDescriptor loaded = provider.load(NAME);

    Path namedDirectory = new Path(loaded.getLocation());
    Path metadataDirectory = new Path(namedDirectory, ".metadata");
    Path propertiesFile = new Path(metadataDirectory, "descriptor.properties");
    Path schemaFile = new Path(metadataDirectory, "schema.avsc");

    boolean result = provider.delete(NAME);
View Full Code Here

  private final StorageKey reusedKey;

  private ReaderWriterState state;

  public PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(descriptor.isPartitioned(),
        "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();
    this.maxWriters = Math.min(10, partitionStrategy.getCardinality());
    this.state = ReaderWriterState.NEW;
    this.reusedKey = new StorageKey(partitionStrategy);
  }
View Full Code Here

  }

  @Test
  @SuppressWarnings("unchecked")
  public void testNotPartitioned() throws Exception {
    final DatasetDescriptor flatDescriptor = new DatasetDescriptor
        .Builder(testDescriptor).partitionStrategy(null).build();
    final Dataset<StandardEvent> flatDataset =
        repo.create("flat", flatDescriptor);
    final RangeView<StandardEvent> notPartitioned = (RangeView<StandardEvent>)
        flatDataset;
View Full Code Here

    out.close();
  }

  @Override
  public DatasetReader<GenericData.Record> newReader() throws IOException {
    final DatasetDescriptor desc = new DatasetDescriptor.Builder()
        .property("cdk.csv.lines-to-skip", "1")
        .schema(VALIDATOR_SCHEMA)
        .build();
    return new CSVFileReader<GenericData.Record>(localfs, validatorFile, desc);
  }
View Full Code Here

    };
  }

  @Test(expected = IllegalArgumentException.class)
  public void testRejectsNonRecordSchemas() {
    final DatasetDescriptor desc = new DatasetDescriptor.Builder()
        .schema(SchemaBuilder.array().items().stringType())
        .build();
    new CSVFileReader(localfs, csvFile, desc);
  }
View Full Code Here

    new CSVFileReader(localfs, csvFile, desc);
  }

  @Test
  public void testStringSchema() {
    final DatasetDescriptor desc = new DatasetDescriptor.Builder()
        .schema(STRINGS)
        .build();
    final CSVFileReader<GenericData.Record> reader =
        new CSVFileReader<GenericData.Record>(localfs, csvFile, desc);
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.data.DatasetDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.