Examples of DatasetDescriptor


Examples of org.kitesdk.data.DatasetDescriptor

    Assert.assertFalse(reader.hasNext());
  }

  @Test
  public void testCustomGenericRecords() {
    final DatasetDescriptor desc = new DatasetDescriptor.Builder()
        .schema(SCHEMA)
        .build();
    final CSVFileReader<TestGenericRecord> reader =
        new CSVFileReader<TestGenericRecord>(localfs, csvFile, desc,
        DataModelUtil.accessor(TestGenericRecord.class, desc.getSchema()));

    reader.initialize();
    Assert.assertTrue(reader.hasNext());
    TestGenericRecord record = reader.next();
    Assert.assertEquals("str", record.get(0));
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    command.datasets = Lists.newArrayList("users");
    command.avroSchemaFile = avroSchemaFile.toString();
    command.run();

    DatasetDescriptor updated = new DatasetDescriptor.Builder(original)
        .schemaLiteral(schema2)
        .build();

    verify(repo).load("default", "users"); // need to load the current dataset
    verify(ds).getDescriptor(); // should inspect and use its descriptor
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

        "new.property=1234",
        "prop=with=equals"
    );
    command.run();

    DatasetDescriptor updated = new DatasetDescriptor.Builder(original)
        .property("new.property", "1234")
        .property("prop", "with=equals")
        .build();

    verify(repo).load("default", "users"); // need to load the current dataset
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    }
  }

  @Test
  public void testDescriptorValidationPasses() {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(schema)
        .partitionStrategy(strategy)
        .build();
    Assert.assertEquals("Descriptor should have correct schema",
        schema, descriptor.getSchema());
    Assert.assertEquals("Descriptor should have correct strategy",
        strategy, descriptor.getPartitionStrategy());
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  // This class is Immutable and must be thread-safe
  protected final ThreadLocal<StorageKey> keys;

  protected AbstractRefinableView(Dataset<E> dataset, Class<E> type) {
    this.dataset = dataset;
    final DatasetDescriptor descriptor = dataset.getDescriptor();
    if (descriptor.isPartitioned()) {
      this.constraints = new Constraints(
          descriptor.getSchema(), descriptor.getPartitionStrategy());
      this.comparator = new MarkerComparator(descriptor.getPartitionStrategy());
      this.keys = new ThreadLocal<StorageKey>() {
        @Override
        protected StorageKey initialValue() {
          return new StorageKey(descriptor.getPartitionStrategy());
        }
      };
    } else {
      this.constraints = new Constraints(descriptor.getSchema());
      this.comparator = null;
      this.keys = null;
    }
    this.accessor = DataModelUtil.accessor(type, descriptor.getSchema());
    this.entityTest = constraints.toEntityPredicate(accessor);
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  public void testRefineIdentity() throws Exception {
      PartitionStrategy strategy = new PartitionStrategy.Builder()
              .identity("user_id")
              .build();

      DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
              .schemaUri("resource:standard_event.avsc")
              .partitionStrategy(strategy)
              .build();

      // Create a separate dataset to avoid conflicts with the above.
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

        .requiredString("email")
        .endRecord();
    PartitionStrategy strategy = new PartitionStrategy.Builder()
        .provided("version", "int")
        .build();
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(user)
        .partitionStrategy(strategy)
        .build();

    Path datasetPath = new Path("file:" + testDirectory + "/provided/users");
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  @Test
  public void testCreate() {
    Assert.assertFalse("Sanity check", provider.exists(NAMESPACE, NAME));

    DatasetDescriptor created = provider.create(NAMESPACE, NAME, testDescriptor);

    Assert.assertNotNull("Descriptor should be returned", created);
    Assert.assertTrue("Descriptor should exist", provider.exists(NAMESPACE, NAME));
    Assert.assertEquals("Schema should match",
        testDescriptor.getSchema(), created.getSchema());
    Assert.assertEquals("PartitionStrategy should match",
        testDescriptor.getPartitionStrategy(), created.getPartitionStrategy());
    Assert.assertEquals("Format should match",
        testDescriptor.getFormat(), created.getFormat());
    // the MetadataProvider optionally sets the location, nothing to test
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  public void testCreateWithLocation() throws URISyntaxException {
    Assert.assertFalse("Sanity check", provider.exists(NAMESPACE, NAME));

    String auth = getDFS().getUri().getAuthority();
    URI requestedLocation = new URI("hdfs://" + auth + "/tmp/data/my_data_set");
    DatasetDescriptor requested = new DatasetDescriptor.Builder(testDescriptor)
        .location(requestedLocation)
        .build();

    final DatasetDescriptor created;
    try {
      created = provider.create(NAMESPACE, NAME, requested);
    } catch (UnsupportedOperationException ex) {
      // this is expected if the provider doesn't support requested locations
      return;
    }

    // if supported, the location should be unchanged.
    Assert.assertNotNull("Descriptor should be returned", created);
    Assert.assertTrue("Descriptor should exist", provider.exists(NAMESPACE, NAME));
    Assert.assertEquals("Requested locations should match",
        requestedLocation, created.getLocation());
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  @Test
  public void testLoadSetsLocation() throws IOException {
    ensureCreated();

    DatasetDescriptor loaded = provider.load(NAMESPACE, NAME);
    Assert.assertNotNull("Loaded descriptor should have a location",
        loaded.getLocation());
    if (distributed) {
      // purposely call new Configuration() to test that the URI has HDFS info
      Assert.assertEquals(
          getDFS(),
          FileSystem.get(loaded.getLocation(), new Configuration()));
      Assert.assertEquals(
          "hdfs",
          loaded.getLocation().getScheme());
      Assert.assertEquals(
          getDFS().getUri().getAuthority(),
          loaded.getLocation().getAuthority());
    } else {
      // purposely call new Configuration() to test that the URI has FS info
      Assert.assertEquals(
          getFS(),
          FileSystem.get(loaded.getLocation(), new Configuration()));
      Assert.assertEquals(
          "file",
          loaded.getLocation().getScheme());
      Assert.assertEquals(
          getFS().getUri().getAuthority(),
          loaded.getLocation().getAuthority());
    }
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.