/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.common.io;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.BlockLocation;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.FileSystem.WriteMode;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.util.ReflectionUtil;
/**
* Provides convenience methods to deal with I/O operations related to {@link InputFormat} and {@link OutputFormat}.
*/
public class FormatUtil {
/**
* Creates an {@link InputFormat} from a given class for the specified file. The optional {@link Configuration}
* initializes the format.
*
* @param <T>
* the class of the InputFormat
* @param inputFormatClass
* the class of the InputFormat
* @param path
* the path of the file
* @param configuration
* optional configuration of the InputFormat
* @return the created {@link InputFormat}
* @throws IOException
* if an I/O error occurred while accessing the file or initializing the InputFormat.
*/
public static <T, F extends FileInputFormat<T>> F openInput(
Class<F> inputFormatClass, String path, Configuration configuration)
throws IOException
{
configuration = configuration == null ? new Configuration() : configuration;
Path normalizedPath = normalizePath(new Path(path));
final F inputFormat = ReflectionUtil.newInstance(inputFormatClass);
inputFormat.setFilePath(normalizedPath);
inputFormat.setOpenTimeout(0);
inputFormat.configure(configuration);
final FileSystem fs = FileSystem.get(normalizedPath.toUri());
FileStatus fileStatus = fs.getFileStatus(normalizedPath);
BlockLocation[] blocks = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
inputFormat.open(new FileInputSplit(0, new Path(path), 0, fileStatus.getLen(), blocks[0].getHosts()));
return inputFormat;
}
/**
* Creates {@link InputFormat}s from a given class for the specified file(s). The optional {@link Configuration}
* initializes the formats.
*
* @param <T>
* the class of the InputFormat
* @param inputFormatClass
* the class of the InputFormat
* @param path
* the path of the file or to the directory containing the splits
* @param configuration
* optional configuration of the InputFormat
* @return the created {@link InputFormat}s for each file in the specified path
* @throws IOException
* if an I/O error occurred while accessing the files or initializing the InputFormat.
*/
@SuppressWarnings("unchecked")
public static <T, F extends FileInputFormat<T>> List<F> openAllInputs(
Class<F> inputFormatClass, String path, Configuration configuration) throws IOException {
Path nephelePath = new Path(path);
FileSystem fs = nephelePath.getFileSystem();
FileStatus fileStatus = fs.getFileStatus(nephelePath);
if (!fileStatus.isDir()) {
return Arrays.asList(openInput(inputFormatClass, path, configuration));
}
FileStatus[] list = fs.listStatus(nephelePath);
List<F> formats = new ArrayList<F>();
for (int index = 0; index < list.length; index++) {
formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
}
return formats;
}
/**
* Creates an {@link InputFormat} from a given class. The optional {@link Configuration}
* initializes the format.
*
* @param <T>
* the class of the InputFormat
* @param inputFormatClass
* the class of the InputFormat
* @param configuration
* optional configuration of the InputFormat
* @return the created {@link InputFormat}
* @throws IOException
* if an I/O error occurred while accessing the file or initializing the InputFormat.
*/
public static <T, IS extends InputSplit, F extends InputFormat<T, IS>> F openInput(
Class<F> inputFormatClass, Configuration configuration) throws IOException {
configuration = configuration == null ? new Configuration() : configuration;
final F inputFormat = ReflectionUtil.newInstance(inputFormatClass);
inputFormat.configure(configuration);
final IS[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
return inputFormat;
}
/**
* Creates an {@link OutputFormat} from a given class for the specified file. The optional {@link Configuration}
* initializes the format.
*
* @param <T>
* the class of the OutputFormat
* @param outputFormatClass
* the class of the OutputFormat
* @param path
* the path of the file or to the directory containing the splits
* @param configuration
* optional configuration of the OutputFormat
* @return the created {@link OutputFormat}
* @throws IOException
* if an I/O error occurred while accessing the file or initializing the OutputFormat.
*/
public static <T, F extends FileOutputFormat<? extends T>> F openOutput(
Class<F> outputFormatClass, String path, Configuration configuration)
throws IOException
{
final F outputFormat = ReflectionUtil.newInstance(outputFormatClass);
outputFormat.setOutputFilePath(new Path(path));
outputFormat.setOpenTimeout(0);
outputFormat.setWriteMode(WriteMode.OVERWRITE);
configuration = configuration == null ? new Configuration() : configuration;
outputFormat.configure(configuration);
outputFormat.open(0, 1);
return outputFormat;
}
/**
* Fixes the path if it denotes a local (relative) file without the proper protocol prefix.
*/
private static Path normalizePath(Path path) {
URI uri = path.toUri();
if (uri.getScheme() == null) {
try {
uri = new URI("file", uri.getHost(), uri.getPath(), uri.getFragment());
path = new Path(uri.toString());
} catch (URISyntaxException e) {
throw new IllegalArgumentException("path is invalid", e);
}
}
return path;
}
}