Package com.cloudera.cdk.data.filesystem

Source Code of com.cloudera.cdk.data.filesystem.FileSystemView

/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.cloudera.cdk.data.filesystem;

import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.DatasetIOException;
import com.cloudera.cdk.data.DatasetReader;
import com.cloudera.cdk.data.DatasetWriter;
import com.cloudera.cdk.data.View;
import com.cloudera.cdk.data.spi.AbstractRangeView;
import com.cloudera.cdk.data.spi.StorageKey;
import com.cloudera.cdk.data.spi.MarkerRange;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import javax.annotation.concurrent.Immutable;
import java.io.IOException;

/**
* FileSystem implementation of a range-based {@link View}.
*
* @param <E> The type of records read and written by this view.
*/
@Immutable
class FileSystemView<E> extends AbstractRangeView<E> {

  private final FileSystem fs;
  private final Path root;

  FileSystemView(FileSystemDataset<E> dataset) {
    super(dataset);
    this.fs = dataset.getFileSystem();
    this.root = dataset.getDirectory();
  }

  private FileSystemView(FileSystemView<E> view, MarkerRange range) {
    super(view, range);
    this.fs = view.fs;
    this.root = view.root;
  }

  @Override
  protected FileSystemView<E> newLimitedCopy(MarkerRange newRange) {
    return new FileSystemView<E>(this, newRange);
  }

  @Override
  public DatasetReader<E> newReader() {
    return new MultiFileDatasetReader<E>(
        fs, pathIterator(), dataset.getDescriptor());
  }

  @Override
  public DatasetWriter<E> newWriter() {
    if (dataset.getDescriptor().isPartitioned()) {
      return new PartitionedDatasetWriter<E>(this);
    } else {
      return FileSystemWriters.newFileWriter(fs, root, dataset.getDescriptor());
    }
  }

  @Override
  public boolean deleteAll() {
    PathConversion convert = new PathConversion();
    boolean deleted = false;
    for (StorageKey partition : partitionIterator()) {
      deleted = cleanlyDelete(fs, root, convert.fromKey(partition)) || deleted;
    }
    return deleted;
  }

  @Override
  @SuppressWarnings("unchecked")
  public Iterable<View<E>> getCoveringPartitions() {
    if (dataset.getDescriptor().isPartitioned()) {
      return Iterables.transform(
          partitionIterator(),
          new Function<StorageKey, View<E>>() {
            @Override
            public View<E> apply(StorageKey key) {
              if (key != null) {
                // no need for the bounds checks, use dataset.in
                return ((FileSystemDataset) dataset).of(key);
              } else {
                throw new DatasetException("[BUG] Null partition");
              }
            }
          });
    } else {
      return Lists.newArrayList((View<E>) this);
    }
  }

  PathIterator pathIterator() {
    final Iterable<Path> directories;
    if (dataset.getDescriptor().isPartitioned()) {
      directories = Iterables.transform(
          partitionIterator(),
          new Function<StorageKey, Path>() {
            private final PathConversion convert = new PathConversion();
            @Override
            public Path apply(StorageKey key) {
              if (key != null) {
                return new Path(root, convert.fromKey(key));
              } else {
                throw new DatasetException("[BUG] Null partition");
              }
            }
          });
    } else {
      directories = Lists.newArrayList(root);
    }
    return new PathIterator(fs, directories);
  }

  private FileSystemPartitionIterator partitionIterator() {
    try {
      return new FileSystemPartitionIterator(
          fs, root,
          dataset.getDescriptor().getPartitionStrategy(), range);
    } catch (IOException ex) {
      throw new DatasetException("Cannot list partitions in view:" + this, ex);
    }
  }

  private static boolean cleanlyDelete(FileSystem fs, Path root, Path dir) {
    try {
      boolean deleted = false;
      if (dir.isAbsolute()) {
        deleted = fs.delete(dir, true /* include any files */ );
      } else {
        // the path should be treated as relative to the root path
        Path absolute = new Path(root, dir);
        deleted = fs.delete(absolute, true /* include any files */ );
        // iterate up to the root, removing empty directories
        for (Path current = absolute.getParent();
             !current.equals(root) && !current.isRoot();
             current = current.getParent()) {
          final FileStatus[] stats = fs.listStatus(current);
          if (stats == null || stats.length == 0) {
            // dir is empty and should be removed
            deleted = fs.delete(current, true) || deleted;
          } else {
            // all parent directories will be non-empty
            break;
          }
        }
      }
      return deleted;
    } catch (IOException ex) {
      throw new DatasetIOException("Could not cleanly delete path:" + dir, ex);
    }
  }
}
TOP

Related Classes of com.cloudera.cdk.data.filesystem.FileSystemView

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.