Package org.kitesdk.data.spi.filesystem

Examples of org.kitesdk.data.spi.filesystem.TemporaryFileSystemDatasetRepository


            PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
        .format("csv")
        .build();
    csvDescriptor = props.addToDescriptor(csvDescriptor);

    TemporaryFileSystemDatasetRepository repo =
        new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp")),
            target.getDataset().getNamespace(),
            UUID.randomUUID().toString());

    try {
      FileSystemDataset<Record> csvDataset =
          (FileSystemDataset) repo.create("default", "csv", csvDescriptor);

      Iterator<Path> iter = csvDataset.pathIterator().iterator();
      Preconditions.checkArgument(iter.hasNext(),
          "CSV path has no data files: " + source);
      Schema csvSchema = CSVUtil.inferSchema(
          datasetSchema.getFullName(), sourceFS.open(iter.next()), props);

      if (!skipSchemaChecks) {
        Preconditions.checkArgument(
            SchemaValidationUtil.canRead(csvSchema, datasetSchema),
            "Incompatible schemas\nCSV: %s\nDataset: %s",
            csvSchema.toString(true), datasetSchema.toString(true));
        // TODO: add support for orderByHeaders
        Preconditions.checkArgument(verifyFieldOrder(csvSchema, datasetSchema),
            "Incompatible schema field order\nCSV: %s\nDataset: %s",
            csvSchema.toString(true), datasetSchema.toString(true));
      }

      TaskUtil.configure(getConf()).addJars(jars);

      TransformTask task;
      if (transform != null) {
        DoFn<Record, Record> transformFn;
        try {
          DynConstructors.Ctor<DoFn<Record, Record>> ctor =
              new DynConstructors.Builder(DoFn.class)
                  .loader(loaderForJars(jars))
                  .impl(transform)
                  .buildChecked();
          transformFn = ctor.newInstance();
        } catch (NoSuchMethodException e) {
          throw new DatasetException(
              "Cannot find no-arg constructor for class: " + transform, e);
        }
        task = new TransformTask<Record, Record>(
            csvDataset, target, transformFn);
      } else {
        task = new CopyTask<Record>(csvDataset, target);
      }

      task.setConf(getConf());

      if (noCompaction) {
        task.noCompaction();
      }

      if (numWriters >= 0) {
        task.setNumWriters(numWriters);
      }

      PipelineResult result = task.run();

      if (result.succeeded()) {
        long count = task.getCount();
        if (count > 0) {
          console.info("Added {} records to \"{}\"", count, dataset);
        }
        return 0;
      } else {
        return 1;
      }
    } finally {
      // clean up the temporary repository
      repo.delete();
    }
  }
View Full Code Here

TOP

Related Classes of org.kitesdk.data.spi.filesystem.TemporaryFileSystemDatasetRepository

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.