Package com.cloudera.cdk.data.filesystem

Source Code of com.cloudera.cdk.data.filesystem.FileSystemDatasetWriter$Builder

/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.filesystem;

import com.cloudera.cdk.data.spi.ReaderWriterState;
import com.cloudera.cdk.data.DatasetWriter;
import com.cloudera.cdk.data.DatasetWriterException;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.io.Closeables;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

class FileSystemDatasetWriter<E> implements DatasetWriter<E> {

  private static final Logger logger = LoggerFactory
    .getLogger(FileSystemDatasetWriter.class);

  private Path path;
  private Schema schema;
  private FileSystem fileSystem;
  private boolean enableCompression;

  private Path pathTmp;
  private FSDataOutputStream out;
  private DataFileWriter<E> dataFileWriter;
  private DatumWriter<E> writer;
  private ReaderWriterState state;

  public FileSystemDatasetWriter(FileSystem fileSystem, Path path,
    Schema schema, boolean enableCompression) {

    this.fileSystem = fileSystem;
    this.path = path;
    this.pathTmp = new Path(path.getParent(), "." + path.getName() + ".tmp");
    this.schema = schema;
    this.enableCompression = enableCompression;
    this.state = ReaderWriterState.NEW;
  }

  @Override
  public void open() {
    Preconditions.checkState(state.equals(ReaderWriterState.NEW),
      "Unable to open a writer from state:%s", state);

    logger.debug(
      "Opening data file with pathTmp:{} (final path will be path:{})",
      pathTmp, path);

    writer = new ReflectDatumWriter<E>();
    dataFileWriter = new DataFileWriter<E>(writer);

    /*
     * We may want to expose the codec in the writer and simply rely on the
     * builder and proper instantiation from dataset-level configuration.
     * Hard-coding snappy seems a little too draconian.
     */
    if (enableCompression) {
      dataFileWriter.setCodec(CodecFactory.snappyCodec());
    }

    try {
      out = fileSystem.create(pathTmp, true);
      dataFileWriter.create(schema, out);
    } catch (IOException e) {
      throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e);
    }

    state = ReaderWriterState.OPEN;
  }

  @Override
  public void write(E entity) {
    Preconditions.checkState(state.equals(ReaderWriterState.OPEN),
      "Attempt to write to a writer in state:%s", state);

    try {
      dataFileWriter.append(entity);
    } catch (IOException e) {
      throw new DatasetWriterException(
        "Unable to write entity:" + entity + " with writer:" + dataFileWriter, e);
    }
  }

  @Override
  public void flush() {
    Preconditions.checkState(state.equals(ReaderWriterState.OPEN),
      "Attempt to write to a writer in state:%s", state);

    try {
      dataFileWriter.flush();
      out.hflush();
    } catch (IOException e) {
      throw new DatasetWriterException(
        "Unable to flush file writer:" + dataFileWriter);
    }
  }

  @Override
  public void close() {
    if (state.equals(ReaderWriterState.OPEN)) {
      logger.debug("Closing pathTmp:{}", pathTmp);

      try {
        Closeables.close(dataFileWriter, false);
      } catch (IOException e) {
        throw new DatasetWriterException(
          "Unable to close writer:" + dataFileWriter + " to path:" + pathTmp);
      }

      logger.debug("Committing pathTmp:{} to path:{}", pathTmp, path);

      try {
        if (!fileSystem.rename(pathTmp, path)) {
          throw new DatasetWriterException(
            "Failed to move " + pathTmp + " to " + path);
        }
      } catch (IOException e) {
        throw new DatasetWriterException(
          "Internal error while trying to commit path:" + pathTmp, e);
      }

      state = ReaderWriterState.CLOSED;
    }
  }

  @Override
  public boolean isOpen() {
    return state.equals(ReaderWriterState.OPEN);
  }

  @Override
  public String toString() {
    return Objects.toStringHelper(this)
      .add("path", path)
      .add("schema", schema)
      .add("fileSystem", fileSystem)
      .add("enableCompression", enableCompression)
      .add("pathTmp", pathTmp)
      .add("dataFileWriter", dataFileWriter)
      .add("writer", writer)
      .add("state", state)
      .toString();
  }

  public static class Builder {

    private FileSystem fileSystem;
    private Path path;
    private Schema schema;
    private boolean enableCompression;

    public Builder() {
      enableCompression = true;
    }

    public Builder fileSystem(FileSystem fileSystem) {
      this.fileSystem = fileSystem;
      return this;
    }

    public Builder path(Path path) {
      this.path = path;
      return this;
    }

    public Builder schema(Schema schema) {
      this.schema = schema;
      return this;
    }

    public Builder enableCompression(boolean enableCompression) {
      this.enableCompression = enableCompression;
      return this;
    }

    public <E> FileSystemDatasetWriter<E> build() {
      Preconditions
        .checkState(fileSystem != null, "File system is not defined");
      Preconditions.checkState(path != null, "Path is not defined");
      Preconditions.checkState(schema != null, "Schema is not defined");

      return new FileSystemDatasetWriter<E>(
        fileSystem, path, schema, enableCompression);
    }

  }

}
TOP

Related Classes of com.cloudera.cdk.data.filesystem.FileSystemDatasetWriter$Builder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.