/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.filesystem;
import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.FieldPartitioner;
import com.cloudera.cdk.data.PartitionStrategy;
import com.cloudera.cdk.data.spi.StorageKey;
import com.cloudera.cdk.data.spi.MarkerRange;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
class FileSystemPartitionIterator implements Iterator<StorageKey>, Iterable<StorageKey> {
private static final Logger logger = LoggerFactory.getLogger(
FileSystemPartitionIterator.class);
private final FileSystem fs;
private final Path rootDirectory;
private final Iterator<StorageKey> iterator;
class FileSystemIterator extends MultiLevelIterator<String> {
public FileSystemIterator(int depth) throws IOException {
super(depth);
}
@Override
@SuppressWarnings("unchecked")
public Iterable<String> getLevel(List<String> current) {
final Set<String> dirs = Sets.newLinkedHashSet();
Path dir = rootDirectory;
for (int i = 0, n = current.size(); i < n; i += 1) {
dir = new Path(dir, current.get(i));
}
try {
for (FileStatus stat : fs.listStatus(dir, PathFilters.notHidden())) {
if (stat.isDirectory()) {
// TODO: add a check here for range.couldContain(Marker)
dirs.add(stat.getPath().getName());
}
}
} catch (IOException ex) {
throw new DatasetException("Cannot list directory:" + dir, ex);
}
return dirs;
}
}
/**
* Predicate to determine whether a {@link com.cloudera.cdk.data.spi.StorageKey} is in a {@link MarkerRange}.
*/
private static class InRange implements Predicate<StorageKey> {
private final MarkerRange range;
public InRange(MarkerRange range) {
this.range = range;
}
@Override
public boolean apply(StorageKey key) {
return range.contains(key);
}
}
/**
* Conversion function to transform a List into a {@link com.cloudera.cdk.data.spi.StorageKey}.
*/
private static class MakeKey implements Function<List<String>, StorageKey> {
private final List<FieldPartitioner> partitioners;
private final StorageKey reusableKey;
private final PathConversion convert;
public MakeKey(PartitionStrategy strategy) {
this.partitioners = strategy.getFieldPartitioners();
this.reusableKey = new StorageKey(strategy);
this.convert = new PathConversion();
}
@Override
@SuppressWarnings("unchecked")
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="NP_PARAMETER_MUST_BE_NONNULL_BUT_MARKED_AS_NULLABLE",
justification="Non-null @Nullable parameter inherited from Function")
public StorageKey apply(List<String> dirs) {
List<Object> values = Lists.newArrayListWithCapacity(dirs.size());
for (int i = 0, n = partitioners.size(); i < n; i += 1) {
values.add(convert.valueForDirname(partitioners.get(i), dirs.get(i)));
}
reusableKey.replaceValues(values);
return reusableKey;
}
}
FileSystemPartitionIterator(
FileSystem fs, Path root, PartitionStrategy strategy, MarkerRange range)
throws IOException {
Preconditions.checkArgument(fs.isDirectory(root));
this.fs = fs;
this.rootDirectory = root;
this.iterator = Iterators.filter(
Iterators.transform(
new FileSystemIterator(strategy.getFieldPartitioners().size()),
new MakeKey(strategy)),
new InRange(range));
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public StorageKey next() {
return iterator.next();
}
@Override
public void remove() {
iterator.remove();
}
@Override
public Iterator<StorageKey> iterator() {
return this;
}
}