Examples of DatasetDescriptor


Examples of com.cloudera.cdk.data.DatasetDescriptor

    Schema schema = new Schema.Parser().parse(
        Resources.getResource("combined_log_format.avsc").openStream());

    // Create the dataset
    DatasetRepository repo = DatasetRepositories.open(datasetUri);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder()
        .schema(schema).build();
    Dataset<Object> dataset = repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

        public Dataset<GenericRecord> run() {
          return Datasets.load(uri);
        }
      });

    DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    String formatName = descriptor.getFormat().getName();
    Preconditions.checkArgument(allowedFormats().contains(formatName),
      "Unsupported format: " + formatName);

    Schema newSchema = descriptor.getSchema();
    if (targetSchema == null || !newSchema.equals(targetSchema)) {
      this.targetSchema = descriptor.getSchema();
      // target dataset schema has changed, invalidate all readers based on it
      readers.invalidateAll();
    }

    this.reuseDatum = !("parquet".equals(formatName));
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    // run the sink
    sink.start();
    sink.process();

    // update the dataset's schema
    DatasetDescriptor updated = new DatasetDescriptor
      .Builder(Datasets.load(FILE_DATASET_URI).getDataset().getDescriptor())
      .schema(UPDATED_SCHEMA)
      .build();
    Datasets.update(FILE_DATASET_URI, updated);
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    readWithCallback(targetClass, callback, null);
  }

  @Override
  public <T> Collection<T> read(Class<T> targetClass) {
    DatasetDescriptor descriptor = getDatasetDescriptor(targetClass);
    if (descriptor == null) {
      throw new StoreException("Unable to locate dataset for target class " + targetClass.getName());
    }
    if (Formats.PARQUET.equals(descriptor.getFormat())) {
      return readGenericRecords(targetClass, null);
    } else {
      return readPojo(targetClass, null);
    }
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    readWithCallback(targetClass, callback, viewCallback);
  }

  @Override
  public <T> Collection<T> read(Class<T> targetClass, ViewCallback viewCallback) {
    DatasetDescriptor descriptor = getDatasetDescriptor(targetClass);
    if (descriptor == null) {
      throw new StoreException("Unable to locate dataset for target class " + targetClass.getName());
    }
    if (Formats.PARQUET.equals(descriptor.getFormat())) {
      return readGenericRecords(targetClass, viewCallback);
    } else {
      return readPojo(targetClass, viewCallback);
    }
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

          fields.add(new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultValue()));
        }
        genericSchema.setFields(fields);
        schema = genericSchema;
      }
      DatasetDescriptor descriptor;
      if (datasetDefinition.getPartitionStrategy() == null) {
        descriptor = new DatasetDescriptor.Builder()
            .schema(schema)
            .format(datasetDefinition.getFormat())
            .build();
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  }

  @Test
  public void testGetDatasetDescriptor() {
    datasetOperations.write(records);
    DatasetDescriptor desc1 = datasetOperations.getDatasetDescriptor(TestPojo.class);
    assertNotNull(desc1);
    DatasetDescriptor desc2 = datasetOperations.getDatasetDescriptor(RandomPojo.class);
    assertNull(desc2);
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

    conf.set(CONF_AVRO_SCHEMA, schema.toString());
    DatasetKeyOutputFormat.configure(conf).writeTo(dataset);
  }

  private static Dataset createDataset(Schema schema, String uri) {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(schema)
        .format(Formats.PARQUET)
        .build();
    return Datasets.create(uri, descriptor, GenericRecord.class);
  }
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

          public Dataset<GenericRecord> run() {
            return Datasets.load(uri);
          }
        });

    DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    String formatName = descriptor.getFormat().getName();
    Preconditions.checkArgument(allowedFormats().contains(formatName),
        "Unsupported format: " + formatName);

    Schema newSchema = descriptor.getSchema();
    if (targetSchema == null || !newSchema.equals(targetSchema)) {
      this.targetSchema = descriptor.getSchema();
      // target dataset schema has changed, invalidate all readers based on it
      readers.invalidateAll();
    }

    this.reuseDatum = !("parquet".equals(formatName));
View Full Code Here

Examples of org.kitesdk.data.DatasetDescriptor

  @Before
  public void setup() throws Exception {
    repo = new HBaseDatasetRepository.Builder().configuration(
        HBaseTestUtils.getConf()).build();
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testEntity).build();
    ds = (DaoDataset<TestEntity>) repo.create(
        "default", tableName, descriptor, TestEntity.class);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.