Examples of DatasetRepository


Examples of com.cloudera.cdk.data.DatasetRepository

    Schema schema = new Schema.Parser().parse(
        Resources.getResource("combined_log_format.avsc").openStream());

    // Create the dataset
    DatasetRepository repo = DatasetRepositories.open(datasetUri);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor.Builder()
        .schema(schema).build();
    Dataset<Object> dataset = repo.create(datasetName, datasetDescriptor);

    // Run the job
    final String schemaString = schema.toString();
    AvroType<GenericData.Record> outputType = Avros.generics(schema);
    PCollection<String> lines = readTextFile(input);
View Full Code Here

Examples of org.kitesdk.data.DatasetRepository

      throws EventDeliveryException, IOException {
    // setup a minicluster
    MiniDFSCluster cluster = new MiniDFSCluster
        .Builder(new Configuration())
        .build();
    DatasetRepository hdfsRepo = null;
    try {
      FileSystem dfs = cluster.getFileSystem();
      Configuration conf = dfs.getConf();
      String repoURI = "repo:" + conf.get("fs.defaultFS") + "/tmp/repo";

      // create a repository and dataset in HDFS
      hdfsRepo = DatasetRepositories.open(repoURI);
      hdfsRepo.create(DATASET_NAME, DESCRIPTOR);

      // update the config to use the HDFS repository
      config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, repoURI);

      DatasetSink sink = sink(in, config);

      // run the sink
      sink.start();
      sink.process();
      sink.stop();

      Assert.assertEquals(
          Sets.newHashSet(expected),
          read(hdfsRepo.<GenericData.Record>load(DATASET_NAME)));
      Assert.assertEquals("Should have committed", 0, remaining(in));

    } finally {
      if (hdfsRepo != null && hdfsRepo.exists(DATASET_NAME)) {
        hdfsRepo.delete(DATASET_NAME);
      }
      cluster.shutdown();
    }
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

    repositoryUri = new URI("repo:hbase:" + zk);
  }

  @Test
  public void testBasic() {
    DatasetRepository repo = DatasetRepositories.repositoryFor(repositoryUri);
    repo.delete("default", "test");
    repo.create("default", "test", descriptor);

    RandomAccessDataset<Object> ds = Datasets
  .<Object, RandomAccessDataset<Object>>load(URI.create("dataset:hbase:" + zk + "/test"), Object.class);

    Assert.assertNotNull("Should load dataset", ds);
    Assert.assertTrue(ds instanceof DaoDataset);
    Assert.assertEquals("Descriptors should match",
        repo.load("default", "test").getDescriptor(), ds.getDescriptor());

    repo.delete("default", "test");
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

  }

  // TODO: Remove the need to use DatasetRepositories.repositoryFor()
  private static DatasetRepository getDatasetRepository(JobContext jobContext) {
    Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
    DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
    if (repo instanceof TemporaryDatasetRepositoryAccessor) {
      Dataset<Object> dataset = load(jobContext).getDataset();
      String namespace = dataset.getNamespace();
      repo = ((TemporaryDatasetRepositoryAccessor) repo)
          .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

  @SuppressWarnings("unchecked")
  private static <E> Dataset<E> createJobDataset(JobContext jobContext) {
    Dataset<Object> dataset = load(jobContext).getDataset();
    String jobDatasetName = getJobDatasetName(jobContext);
    DatasetRepository repo = getDatasetRepository(jobContext);
    return repo.create(TEMP_NAMESPACE, jobDatasetName,
        copy(dataset.getDescriptor()),
        DatasetKeyOutputFormat.<E>getType(jobContext));
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

        copy(dataset.getDescriptor()),
        DatasetKeyOutputFormat.<E>getType(jobContext));
  }

  private static <E> Dataset<E> loadJobDataset(JobContext jobContext) {
    DatasetRepository repo = getDatasetRepository(jobContext);
    return repo.load(TEMP_NAMESPACE, getJobDatasetName(jobContext));
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

    DatasetRepository repo = getDatasetRepository(jobContext);
    return repo.load(TEMP_NAMESPACE, getJobDatasetName(jobContext));
  }

  private static void deleteJobDataset(JobContext jobContext) {
    DatasetRepository repo = getDatasetRepository(jobContext);
    repo.delete(TEMP_NAMESPACE, getJobDatasetName(jobContext));
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

    repo.delete(TEMP_NAMESPACE, getJobDatasetName(jobContext));
  }

  private static <E> Dataset<E> loadOrCreateTaskAttemptDataset(TaskAttemptContext taskContext) {
    String taskAttemptDatasetName = getTaskAttemptDatasetName(taskContext);
    DatasetRepository repo = getDatasetRepository(taskContext);
    Dataset<E> jobDataset = loadJobDataset(taskContext);
    if (repo.exists(TEMP_NAMESPACE, taskAttemptDatasetName)) {
      return repo.load(TEMP_NAMESPACE, taskAttemptDatasetName);
    } else {
      return repo.create(TEMP_NAMESPACE, taskAttemptDatasetName,
          copy(jobDataset.getDescriptor()));
    }
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

          copy(jobDataset.getDescriptor()));
    }
  }

  private static void deleteTaskAttemptDataset(TaskAttemptContext taskContext) {
    DatasetRepository repo = getDatasetRepository(taskContext);
    String taskAttemptDatasetName = getTaskAttemptDatasetName(taskContext);
    if (repo.exists(TEMP_NAMESPACE, taskAttemptDatasetName)) {
      repo.delete(TEMP_NAMESPACE, taskAttemptDatasetName);
    }
  }
View Full Code Here

Examples of org.kitesdk.data.spi.DatasetRepository

  public void testHBaseURI() throws Exception {
    String zkQuorum = HBaseTestUtils.getConf().get(HConstants.ZOOKEEPER_QUORUM);
    String zkClientPort = HBaseTestUtils.getConf().get(HConstants.ZOOKEEPER_CLIENT_PORT);
    String zk = zkQuorum + ":" + zkClientPort; // OK since zkQuorum is a single host
    URI repositoryUri = new URI("repo:hbase:" + zk);
    DatasetRepository repo = DatasetRepositories.repositoryFor(repositoryUri);

    Assert.assertNotNull("Received a repository", repo);
    assertTrue("Repo is a HBase repo", repo instanceof HBaseDatasetRepository);
    assertEquals("Repository URI", repositoryUri, repo.getUri());
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.