/*
* Copyright 2011-2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.hadoop.fs;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.zip.GZIPInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ChecksumFileSystem;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.io.IOUtils;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.data.hadoop.HadoopException;
import org.springframework.data.hadoop.fs.PrettyPrintList.ListPrinter;
import org.springframework.data.hadoop.fs.PrettyPrintMap.MapPrinter;
import org.springframework.util.Assert;
import org.springframework.util.FileCopyUtils;
import org.springframework.util.ObjectUtils;
import org.springframework.util.StringUtils;
/**
* HDFS FileSystem Shell supporting the 'hadoop fs/dfs [x]' commands as methods.
* See the <a href="http://hadoop.apache.org/common/docs/stable/file_system_shell.html">official guide</a> for more information.
* <p>
* This class mimics as much as possible the shell behavior yet it is meant to be used in a programmatic way,
* that is rather then printing out information, they return object or collections that one can iterate through. If the message is
* all that's needed then simply call the returned object {@link #toString()} explicitly or implicitly (by printing out or doing string
* concatenation).
*
* @author Hadoop's FsShell authors
* @author Costin Leau
*/
@SuppressWarnings("deprecation")
public class FsShell implements Closeable, DisposableBean {
private boolean internalFs = false;
private FileSystem fs;
private final Configuration configuration;
private Trash trash;
/**
* Constructs a new <code>FsShell</code> instance.
*
* @param configuration Hadoop configuration to use.
*/
public FsShell(Configuration configuration) {
this(configuration, null);
}
/**
* Constructs a new <code>FsShell</code> instance.
*
* @param configuration Hadoop configuration to use.
* @param fs Hadoop file system to use.
*/
public FsShell(Configuration configuration, FileSystem fs) {
this.configuration = configuration;
try {
this.fs = (fs != null ? fs : FileSystem.get(configuration));
this.internalFs = (fs == null);
this.trash = new Trash(configuration);
} catch (IOException ex) {
throw new HadoopException("Cannot create shell " + ex.getMessage(), ex);
}
}
@Override
public void destroy() throws Exception {
close();
}
@Override
public void close() throws IOException {
if (internalFs && fs != null) {
fs.close();
fs = null;
}
}
private String getContent(InputStream in) throws IOException {
StringWriter writer = new StringWriter(in.available());
InputStreamReader reader = new InputStreamReader(in, "UTF-8");
FileCopyUtils.copy(reader, writer);
return writer.toString();
}
public Collection<Path> cat(String uri) {
return cat(new String[] { uri });
}
public Collection<Path> cat(String... uris) {
final Collection<Path> results = new PrettyPrintList<Path>(new ListPrinter<Path>() {
@Override
public String toString(Path e) throws IOException {
try {
final FileSystem srcFS = getFS(e);
return getContent(srcFS.open(e));
} catch (IOException ex) {
return "No such file or directory " + e.toUri();
}
}
});
try {
if (!ObjectUtils.isEmpty(uris)) {
for (String uri : uris) {
Path src = new Path(uri);
FileSystem srcFS = getFS(src);
results.addAll(Arrays.asList(FileUtil.stat2Paths(srcFS.globStatus(src), src)));
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot execute command " + ex.getMessage(), ex);
}
return Collections.unmodifiableCollection(results);
}
public void chgrp(String group, String uri) {
chgrp(group, uri);
}
public void chgrp(String group, String... uris) {
chgrp(false, group, uris);
}
public void chgrpr(String group, String uri) {
chgrp(group, new String[] { uri });
}
public void chgrpr(String group, String... uris) {
chgrp(true, group, uris);
}
public void chgrp(boolean recursive, String group, String uri) {
chgrp(recursive, group, new String[] { uri });
}
public void chgrp(boolean recursive, String group, String... uris) {
FsShellPermissions.changePermissions(fs, configuration, FsShellPermissions.Op.CHGRP, recursive, group, uris);
}
public void chmod(String mode, String uri) {
chmod(mode, new String[] { uri });
}
public void chmod(String mode, String... uris) {
chmod(false, mode, uris);
}
public void chmodr(String mode, String uri) {
chmodr(mode, new String[] { uri });
}
public void chmodr(String mode, String... uris) {
chmod(true, mode, uris);
}
public void chmodr(Integer mode, String uri) {
chmodr(mode, new String[] { uri });
}
public void chmodr(Integer mode, String... uris) {
chmod(true, String.valueOf(mode), uris);
}
public void chmod(Integer mode, String uri) {
chmod(mode, new String[] { uri });
}
public void chmod(Integer mode, String... uris) {
chmod(false, String.valueOf(mode), uris);
}
public void chmod(boolean recursive, Integer mode, String uri) {
chmod(recursive, mode, new String[] { uri });
}
public void chmod(boolean recursive, Integer mode, String... uris) {
chmod(recursive, String.valueOf(mode), uris);
}
public void chmod(boolean recursive, String mode, String uri) {
chmod(recursive, mode, new String[] { uri });
}
public void chmod(boolean recursive, String mode, String... uris) {
FsShellPermissions.changePermissions(fs, configuration, FsShellPermissions.Op.CHMOD, recursive, mode, uris);
}
public void chown(String mode, String uri) {
chown(mode, new String[] { uri });
}
public void chown(String mode, String... uris) {
chown(false, mode, uris);
}
public void chownr(String mode, String uri) {
chownr(mode, new String[] { uri });
}
public void chownr(String mode, String... uris) {
chown(true, mode, uris);
}
public void chown(boolean recursive, String owner, String uri) {
chown(recursive, owner, new String[] { uri });
}
public void chown(boolean recursive, String owner, String... uris) {
FsShellPermissions.changePermissions(fs, configuration, FsShellPermissions.Op.CHOWN, recursive, owner, uris);
}
public void copyFromLocal(String src, String dst) {
copyFromLocal(src, dst, (String[]) null);
}
public void copyFromLocal(String src, String src2, String... dst) {
Object[] va = parseVarargs(src, src2, dst);
@SuppressWarnings("unchecked")
List<Path> srcs = (List<Path>) va[0];
Path dstPath = (Path) va[1];
try {
FileSystem dstFs = getFS(dstPath);
dstFs.copyFromLocalFile(false, false, srcs.toArray(new Path[srcs.size()]), dstPath);
} catch (IOException ex) {
throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
}
}
public void copyToLocal(String src, String localdst) {
copyToLocal(true, false, src, localdst);
}
public void copyToLocal(boolean ignorecrc, boolean crc, String src, String localdst) {
File dst = new File(localdst);
Path srcpath = new Path(src);
try {
FileSystem srcFs = getFS(srcpath);
srcFs.setVerifyChecksum(ignorecrc);
if (crc && !(srcFs instanceof ChecksumFileSystem)) {
crc = false;
}
FileStatus[] srcs = srcFs.globStatus(srcpath);
boolean dstIsDir = dst.isDirectory();
if (srcs.length > 1 && !dstIsDir) {
throw new IllegalArgumentException("When copying multiple files, "
+ "destination should be a directory.");
}
for (FileStatus status : srcs) {
Path p = status.getPath();
File f = dstIsDir ? new File(dst, p.getName()) : dst;
copyToLocal(srcFs, p, f, crc);
}
} catch (IOException ex) {
throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
}
}
// copied from FsShell
private void copyToLocal(final FileSystem srcFS, final Path src, final File dst, final boolean copyCrc)
throws IOException {
final String COPYTOLOCAL_PREFIX = "_copyToLocal_";
/* Keep the structure similar to ChecksumFileSystem.copyToLocal().
* Ideal these two should just invoke FileUtil.copy() and not repeat
* recursion here. Of course, copy() should support two more options :
* copyCrc and useTmpFile (may be useTmpFile need not be an option).
*/
if (!srcFS.getFileStatus(src).isDir()) {
if (dst.exists()) {
// match the error message in FileUtil.checkDest():
throw new IOException("Target " + dst + " already exists");
}
// use absolute name so that tmp file is always created under dest dir
File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true);
if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) {
throw new IOException("Failed to copy " + src + " to " + dst);
}
if (!tmp.renameTo(dst)) {
throw new IOException("Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\".");
}
if (copyCrc) {
if (!(srcFS instanceof ChecksumFileSystem)) {
throw new IOException("Source file system does not have crc files");
}
ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS;
File dstcs = FileSystem.getLocal(srcFS.getConf()).pathToFile(
csfs.getChecksumFile(new Path(dst.getCanonicalPath())));
copyToLocal(csfs.getRawFileSystem(), csfs.getChecksumFile(src), dstcs, false);
}
}
else {
// once FileUtil.copy() supports tmp file, we don't need to mkdirs().
dst.mkdirs();
for (FileStatus path : srcFS.listStatus(src)) {
copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName()), copyCrc);
}
}
}
public Map<Path, ContentSummary> count(String uri) {
return count(new String[] { uri });
}
public Map<Path, ContentSummary> count(String... uris) {
return count(false, uris);
}
public Map<Path, ContentSummary> count(final boolean quota, String uri) {
return count(quota, new String[] { uri });
}
public Map<Path, ContentSummary> count(final boolean quota, String... uris) {
final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length,
new MapPrinter<Path, ContentSummary>() {
@Override
public String toString(Path p, ContentSummary c) throws IOException {
return c.toString(quota) + p;
}
});
for (String src : uris) {
try {
Path srcPath = new Path(src);
final FileSystem fs = getFS(srcPath);
FileStatus[] statuses = fs.globStatus(srcPath);
Assert.notEmpty(statuses, "Can not find listing for " + src);
for (FileStatus s : statuses) {
Path p = s.getPath();
results.put(p, fs.getContentSummary(p));
}
} catch (IOException ex) {
throw new HadoopException("Cannot find listing " + ex.getMessage(), ex);
}
}
return Collections.unmodifiableMap(results);
}
public void cp(String src, String dst) {
cp(src, dst, (String[]) null);
}
public void cp(String src, String src2, String... dst) {
Object[] va = parseVarargs(src, src2, dst);
@SuppressWarnings("unchecked")
List<Path> srcs = (List<Path>) va[0];
Path dstPath = (Path) va[1];
try {
FileSystem dstFs = dstPath.getFileSystem(configuration);
boolean isDestDir = !dstFs.isFile(dstPath);
if (StringUtils.hasText(src2) || (ObjectUtils.isEmpty(dst) && dst.length > 2)) {
if (!isDestDir) {
throw new IllegalArgumentException("When copying multiple files, destination " + dstPath.toUri()
+ " should be a directory.");
}
}
for (Path path : srcs) {
FileSystem srcFs = path.getFileSystem(configuration);
Path[] from = FileUtil.stat2Paths(srcFs.globStatus(path), path);
if (!ObjectUtils.isEmpty(from) && from.length > 1 && !isDestDir) {
throw new IllegalArgumentException(
"When copying multiple files, destination should be a directory.");
}
for (Path fromPath : from) {
FileUtil.copy(srcFs, fromPath, dstFs, dstPath, false, configuration);
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
}
}
public Map<Path, Long> du(String uri) {
return du(new String[] { uri });
}
public Map<Path, Long> du(String... uris) {
return du(false, uris);
}
public Map<Path, Long> du(final boolean summary, String string) {
return du(summary, new String[] { string });
}
public Map<Path, Long> du(final boolean summary, String... strings) {
if (ObjectUtils.isEmpty(strings)) {
strings = new String[] { "." };
}
final int BORDER = 2;
Map<Path, Long> results = new PrettyPrintMap<Path, Long>(strings.length, new MapPrinter<Path, Long>() {
@Override
public String toString(Path path, Long size) throws Exception {
if (summary) {
return ("".equals(path) ? "." : path) + "\t" + size;
}
return String.format("%-" + (10 + BORDER) + "d", size) + path;
}
});
try {
for (String src : strings) {
Path srcPath = new Path(src);
FileSystem srcFs = getFS(srcPath);
FileStatus[] fileStatus = srcFs.globStatus(srcPath);
if (summary) {
for (FileStatus status : fileStatus) {
results.put(status.getPath(), srcFs.getContentSummary(status.getPath()).getLength());
}
}
else {
FileStatus items[] = srcFs.listStatus(FileUtil.stat2Paths(fileStatus, srcPath));
if (ObjectUtils.isEmpty(items) && (!srcFs.exists(srcPath))) {
throw new HadoopException("Cannot access " + src + ": No such file or directory.");
}
for (FileStatus status : items) {
Long size = (status.isDir() ? srcFs.getContentSummary(status.getPath()).getLength() : status.getLen());
results.put(status.getPath(), size);
}
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot inspect resources " + ex.getMessage(), ex);
}
return Collections.unmodifiableMap(results);
}
public Map<Path, Long> dus(String string) {
return dus(new String[] { string });
}
public Map<Path, Long> dus(String... strings) {
return du(true, strings);
}
public void expunge() {
try {
trash.expunge();
trash.checkpoint();
} catch (IOException ex) {
throw new HadoopException("Cannot expunge trash" + ex.getMessage(), ex);
}
}
public void get(String src, String dst) {
copyToLocal(src, dst);
}
public void get(boolean ignorecrc, boolean crc, String src, String dst) {
copyToLocal(ignorecrc, crc, src, dst);
}
public void getmerge(String src, String localdst) {
getmerge(src, localdst, false);
}
public void getmerge(String src, String localdst, boolean addnl) {
Path srcPath = new Path(src);
Path dst = new Path(localdst);
try {
FileSystem srcFs = getFS(srcPath);
Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
for (int i = 0; i < srcs.length; i++) {
if (addnl) {
FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration,
"\n");
}
else {
FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration,
null);
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot getmerge " + ex.getMessage(), ex);
}
}
public Collection<FileStatus> ls(String match) {
return ls(false, new String[] { match });
}
public Collection<FileStatus> ls(String... match) {
return ls(false, match);
}
public Collection<FileStatus> ls(boolean recursive, String match) {
return ls(recursive, new String[] { match });
}
public Collection<FileStatus> ls(boolean recursive, String... match) {
Collection<FileStatus> results = new PrettyPrintList<FileStatus>(new ListPrinter<FileStatus>() {
@Override
public String toString(FileStatus stat) throws Exception {
final SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm");
int maxReplication = 3, maxLen = 10, maxOwner = 10, maxGroup = 10;
StringBuilder sb = new StringBuilder();
sb.append((stat.isDir() ? "d" : "-") + stat.getPermission() + " ");
sb.append(String.format("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-")));
sb.append(String.format("%-" + maxOwner + "s ", stat.getOwner()));
sb.append(String.format("%-" + maxGroup + "s ", stat.getGroup()));
sb.append(String.format("%" + maxLen + "d ", stat.getLen()));
sb.append(df.format(new Date(stat.getModificationTime())) + " ");
sb.append(stat.getPath().toUri().getPath());
return sb.toString();
}
});
try {
for (String src : match) {
Path srcPath = new Path(src);
FileSystem srcFs = getFS(srcPath);
FileStatus[] srcs = srcFs.globStatus(srcPath);
if (!ObjectUtils.isEmpty(srcs)) {
for (FileStatus status : srcs) {
ls(status, srcFs, recursive, results);
}
}
else {
throw new IllegalArgumentException("Cannot access " + srcPath + ": No such file or directory.");
}
}
return Collections.unmodifiableCollection(results);
} catch (IOException ex) {
throw new HadoopException("Cannot list resources " + ex.getMessage(), ex);
}
}
private void ls(FileStatus src, FileSystem srcFs, boolean recursive, Collection<FileStatus> results)
throws IOException {
results.add(src);
if (src.isDir()) {
final FileStatus[] items = srcFs.listStatus(src.getPath());
if (!ObjectUtils.isEmpty(items)) {
for (FileStatus stat : items) {
if (recursive && stat.isDir()) {
ls(stat, srcFs, recursive, results);
}
else {
results.add(stat);
}
}
}
}
}
public Collection<FileStatus> lsr(String match) {
return ls(true, match);
}
public Collection<FileStatus> lsr(String... match) {
return ls(true, match);
}
public void mkdir(String uri) {
mkdir(new String[] { uri });
}
public void mkdir(String... uris) {
for (String src : uris) {
try {
Path p = new Path(src);
FileSystem srcFs = getFS(p);
FileStatus fstatus = null;
try {
fstatus = srcFs.getFileStatus(p);
if (fstatus.isDir()) {
throw new IllegalStateException("Cannot create directory " + src + ": File exists");
}
else {
throw new IllegalStateException(src + " exists but is not a directory");
}
} catch (FileNotFoundException e) {
if (!srcFs.mkdirs(p)) {
throw new HadoopException("Failed to create " + src);
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot create directory " + ex.getMessage(), ex);
}
}
}
public void moveFromLocal(String localsrc, String dst) {
moveFromLocal(localsrc, dst, (String[]) null);
}
public void moveFromLocal(String localsrc, String localsrc2, String... dst) {
Object[] va = parseVarargs(localsrc, localsrc2, dst);
@SuppressWarnings("unchecked")
List<Path> srcs = (List<Path>) va[0];
Path dstPath = (Path) va[1];
try {
FileSystem dstFs = dstPath.getFileSystem(configuration);
dstFs.moveFromLocalFile(srcs.toArray(new Path[srcs.size()]), dstPath);
} catch (IOException ex) {
throw new HadoopException("Cannot move resources " + ex.getMessage(), ex);
}
}
public void moveToLocal(String src, String dst) {
moveToLocal(false, src, dst);
}
public void moveToLocal(boolean crc, String src, String dst) {
throw new UnsupportedOperationException("Option 'moveToLocal' is not implemented yet.");
}
public void mv(String src, String dst) {
mv(src, dst, (String[]) null);
}
public void mv(String src, String src2, String... dst) {
Object[] va = parseVarargs(src, src2, dst);
@SuppressWarnings({ "unchecked" })
List<Path> sources = (List<Path>) va[0];
Path dstPath = (Path) va[1];
try {
FileSystem dstFs = getFS(dstPath);
boolean isDstDir = !dstFs.isFile(dstPath);
if (sources.size() > 1 && !isDstDir) {
throw new IllegalArgumentException("Destination must be a dir when moving multiple files");
}
for (Path srcPath : sources) {
FileSystem srcFs = getFS(srcPath);
URI srcURI = srcFs.getUri();
URI dstURI = dstFs.getUri();
if (srcURI.compareTo(dstURI) != 0) {
throw new IllegalArgumentException("src and destination filesystems do not match.");
}
Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
if (srcs.length > 1 && !isDstDir) {
throw new IllegalArgumentException("When moving multiple files, destination should be a directory.");
}
for (Path s : srcs) {
if (!srcFs.rename(s, dstPath)) {
FileStatus srcFstatus = null;
FileStatus dstFstatus = null;
try {
srcFstatus = srcFs.getFileStatus(s);
} catch (FileNotFoundException e) {
// ignore
}
try {
dstFstatus = dstFs.getFileStatus(dstPath);
} catch (IOException e) {
}
if ((srcFstatus != null) && (dstFstatus != null)) {
if (srcFstatus.isDir() && !dstFstatus.isDir()) {
throw new IllegalArgumentException("cannot overwrite non directory " + dstPath
+ " with directory " + s);
}
}
throw new HadoopException("Failed to rename " + s + " to " + dstPath);
}
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot rename resources " + ex.getMessage(), ex);
}
}
public void put(String localsrc, String dst) {
put(localsrc, dst, (String[]) null);
}
public void put(String localsrc, String localsrc2, String... dst) {
copyFromLocal(localsrc, localsrc2, dst);
}
public void rm(String uri) {
rm(new String[] { uri });
}
public void rm(String... uris) {
rm(false, false, uris);
}
public void rm(boolean recursive, String uri) {
rm(recursive, new String[] { uri });
}
public void rm(boolean recursive, String... uris) {
rm(recursive, false, uris);
}
public void rm(boolean recursive, boolean skipTrash, String... uris) {
for (String uri : uris) {
try {
Path src = new Path(uri);
FileSystem srcFs = getFS(src);
for (Path p : FileUtil.stat2Paths(srcFs.globStatus(src), src)) {
FileStatus status = srcFs.getFileStatus(p);
if (status.isDir() && !recursive) {
throw new IllegalStateException("Cannot remove directory \"" + src
+ "\", if recursive deletion was not specified");
}
if (!skipTrash) {
try {
Trash trashTmp = new Trash(srcFs, configuration);
trashTmp.moveToTrash(p);
} catch (IOException ex) {
throw new HadoopException("Cannot move to Trash resource " + p, ex);
}
}
srcFs.delete(p, recursive);
}
} catch (IOException ex) {
throw new HadoopException("Cannot delete (all) resources " + ex.getMessage(), ex);
}
}
}
public void rmr(String uri) {
rmr(new String[] { uri });
}
public void rmr(String... uris) {
rm(true, false, uris);
}
public void rmr(boolean skipTrash, String uri) {
rmr(skipTrash, new String[] { uri });
}
public void rmr(boolean skipTrash, String... uris) {
rm(true, skipTrash, uris);
}
public void setrep(short replication, String uri) {
setrep(replication, new String[] { uri });
}
public void setrep(short replication, String... uris) {
setrep(false, replication, uris);
}
public void setrep(boolean recursive, short replication, String uri) {
setrep(recursive, replication, new String[] { uri });
}
public void setrep(boolean recursive, short replication, String... uris) {
setrep(-1, recursive, replication, uris);
}
public void setrepr(short replication, String... uris) {
setrep(-1, true, replication, uris);
}
public void setrepr(short replication, String uri) {
setrepr(replication, new String[] { uri });
}
public void setrepr(long secondsToWait, short replication, String uri) {
setrepr(secondsToWait, replication, new String[] { uri });
}
public void setrepr(long secondsToWait, short replication, String... uris) {
setrep(secondsToWait, true, replication, uris);
}
public void setrep(long secondsToWait, boolean recursive, short replication, String uri) {
setrep(secondsToWait, recursive, replication, new String[] { uri });
}
public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) {
Assert.isTrue(replication >= 1, "Replication must be >=1");
List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null);
try {
for (String uri : uris) {
Path srcPath = new Path(uri);
FileSystem srcFs = getFS(srcPath);
Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
for (Path src : srcs) {
setrep(replication, recursive, srcFs, src, waitList);
}
}
if (waitList != null) {
boolean waitUntilDone = (secondsToWait == 0);
long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait);
for (Path path : waitList) {
FileSystem srcFs = getFS(path);
FileStatus status = srcFs.getFileStatus(path);
long len = status.getLen();
boolean done = false;
while (!done) {
BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len);
int i = 0;
for (; i < locations.length && locations[i].getHosts().length == replication; i++) {
}
done = (i == locations.length);
if (!done && (waitUntilDone || timeLeft > 5000)) {
try {
// sleep for 10s
Thread.sleep(10000);
} catch (InterruptedException e) {
return;
}
timeLeft = -1000;
}
}
}
}
} catch (IOException ex) {
throw new HadoopException("Cannot set replication " + ex.getMessage(), ex);
}
}
private void setrep(short replication, boolean recursive, FileSystem srcFs, Path src, List<Path> waitList)
throws IOException {
if (srcFs.isFile(src)) {
if (srcFs.setReplication(src, replication)) {
if (waitList != null) {
waitList.add(src);
}
}
else {
throw new HadoopException("Cannot set replication for " + src);
}
}
else {
if (recursive) {
FileStatus items[] = srcFs.listStatus(src);
if (!ObjectUtils.isEmpty(items)) {
for (FileStatus status : items) {
setrep(replication, recursive, srcFs, status.getPath(), waitList);
}
}
}
}
}
public boolean test(String uri) {
return test(true, false, false, uri);
}
public boolean test(boolean exists, boolean zero, boolean directory, String uri) {
Path f = new Path(uri);
boolean result = true;
try {
FileSystem srcFs = getFS(f);
// mandatory check - if this fails, so will the others (with a NPE most likely)
result = srcFs.exists(f);
if (result && zero) {
result &= srcFs.getFileStatus(f).getLen() == 0;
}
if (result && directory) {
result &= srcFs.getFileStatus(f).isDir();
}
return result;
} catch (IOException ex) {
throw new HadoopException("Cannot test resource " + uri + ";" + ex.getMessage(), ex);
}
}
public Collection<String> text(String uri) {
return text(new String[] { uri });
}
public Collection<String> text(String... uris) {
Collection<String> texts = new PrettyPrintList<String>(new ListPrinter<String>() {
@Override
public String toString(String e) throws Exception {
return e + "\n";
}
});
for (String uri : uris) {
InputStream in = null;
FSDataInputStream i = null;
try {
Path srcPat = new Path(uri);
FileSystem srcFs = getFS(srcPat);
for (Path src : FileUtil.stat2Paths(srcFs.globStatus(srcPat), srcPat)) {
Assert.isTrue(srcFs.isFile(src), "Source must be a file");
i = srcFs.open(src);
switch (i.readShort()) {
case 0x1f8b: // RFC 1952
i.seek(0);
in = new GZIPInputStream(i);
break;
case 0x5345: // 'S' 'E'
if (i.readByte() == 'Q') {
i.close();
in = new TextRecordInputStream(src, srcFs, configuration);
}
break;
default:
in = i;
break;
}
i.seek(0);
texts.add(getContent(in));
}
} catch (IOException ex) {
throw new HadoopException("Cannot read " + uri + ";" + ex.getMessage(), ex);
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(i);
}
}
return texts;
}
public void touchz(String uri) {
touchz(new String[] { uri });
}
public void touchz(String... uris) {
for (String uri : uris) {
try {
Path src = new Path(uri);
FileSystem srcFs = getFS(src);
FileStatus st;
if (srcFs.exists(src)) {
st = srcFs.getFileStatus(src);
if (st.isDir()) {
// TODO: handle this
throw new IllegalArgumentException(src + " is a directory");
}
else if (st.getLen() != 0)
throw new IllegalArgumentException(src + " must be a zero-length file");
}
else {
IOUtils.closeStream(srcFs.create(src));
}
} catch (IOException ex) {
throw new HadoopException("Cannot touchz " + uri + ";" + ex.getMessage(), ex);
}
}
}
private static Object[] parseVarargs(String src1, String src2, String... dst) {
Assert.hasText(src1, "at least one valid source path needs to be specified");
Path dstPath = null;
// create src path
List<Path> srcs = new ArrayList<Path>();
srcs.add(new Path(src1));
if (!ObjectUtils.isEmpty(dst)) {
srcs.add(new Path(src2));
for (int i = 0; i < dst.length - 1; i++) {
srcs.add(new Path(dst[i]));
}
dstPath = new Path(dst[dst.length - 1]);
}
else {
dstPath = new Path(src2);
}
return new Object[] { srcs, dstPath };
}
/**
* Utility that checks whether the given path has a URI - if it doesn't, it falls back
* to the specified FS (rather then always HDFS as Hadoop does).
*
* @param path path
* @return associated file system
*/
private FileSystem getFS(Path path) throws IOException {
if (StringUtils.hasText(path.toUri().getScheme())) {
return path.getFileSystem(configuration);
}
return fs;
}
}