Package eu.stratosphere.api.common.io

Source Code of eu.stratosphere.api.common.io.FormatUtil

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.api.common.io;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.BlockLocation;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.FileSystem.WriteMode;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.util.ReflectionUtil;

/**
* Provides convenience methods to deal with I/O operations related to {@link InputFormat} and {@link OutputFormat}.
*/
public class FormatUtil {


  /**
   * Creates an {@link InputFormat} from a given class for the specified file. The optional {@link Configuration}
   * initializes the format.
   *
   * @param <T>
   *        the class of the InputFormat
   * @param inputFormatClass
   *        the class of the InputFormat
   * @param path
   *        the path of the file
   * @param configuration
   *        optional configuration of the InputFormat
   * @return the created {@link InputFormat}
   * @throws IOException
   *         if an I/O error occurred while accessing the file or initializing the InputFormat.
   */
  public static <T, F extends FileInputFormat<T>> F openInput(
      Class<F> inputFormatClass, String path, Configuration configuration)
    throws IOException
  {
    configuration = configuration == null ? new Configuration() : configuration;

    Path normalizedPath = normalizePath(new Path(path));
    final F inputFormat = ReflectionUtil.newInstance(inputFormatClass);

    inputFormat.setFilePath(normalizedPath);
    inputFormat.setOpenTimeout(0);
    inputFormat.configure(configuration);

    final FileSystem fs = FileSystem.get(normalizedPath.toUri());
    FileStatus fileStatus = fs.getFileStatus(normalizedPath);

    BlockLocation[] blocks = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    inputFormat.open(new FileInputSplit(0, new Path(path), 0, fileStatus.getLen(), blocks[0].getHosts()));
    return inputFormat;
  }

  /**
   * Creates {@link InputFormat}s from a given class for the specified file(s). The optional {@link Configuration}
   * initializes the formats.
   *
   * @param <T>
   *        the class of the InputFormat
   * @param inputFormatClass
   *        the class of the InputFormat
   * @param path
   *        the path of the file or to the directory containing the splits
   * @param configuration
   *        optional configuration of the InputFormat
   * @return the created {@link InputFormat}s for each file in the specified path
   * @throws IOException
   *         if an I/O error occurred while accessing the files or initializing the InputFormat.
   */
  @SuppressWarnings("unchecked")
  public static <T, F extends FileInputFormat<T>> List<F> openAllInputs(
      Class<F> inputFormatClass, String path, Configuration configuration) throws IOException {
    Path nephelePath = new Path(path);
    FileSystem fs = nephelePath.getFileSystem();
    FileStatus fileStatus = fs.getFileStatus(nephelePath);
    if (!fileStatus.isDir()) {
      return Arrays.asList(openInput(inputFormatClass, path, configuration));
    }
    FileStatus[] list = fs.listStatus(nephelePath);
    List<F> formats = new ArrayList<F>();
    for (int index = 0; index < list.length; index++) {
      formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
    }
    return formats;
  }

  /**
   * Creates an {@link InputFormat} from a given class. The optional {@link Configuration}
   * initializes the format.
   *
   * @param <T>
   *        the class of the InputFormat
   * @param inputFormatClass
   *        the class of the InputFormat
   * @param configuration
   *        optional configuration of the InputFormat
   * @return the created {@link InputFormat}
   * @throws IOException
   *         if an I/O error occurred while accessing the file or initializing the InputFormat.
   */
  public static <T, IS extends InputSplit, F extends InputFormat<T, IS>> F openInput(
      Class<F> inputFormatClass, Configuration configuration) throws IOException {
    configuration = configuration == null ? new Configuration() : configuration;

    final F inputFormat = ReflectionUtil.newInstance(inputFormatClass);
    inputFormat.configure(configuration);
    final IS[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    return inputFormat;
  }
 
  /**
   * Creates an {@link OutputFormat} from a given class for the specified file. The optional {@link Configuration}
   * initializes the format.
   *
   * @param <T>
   *        the class of the OutputFormat
   * @param outputFormatClass
   *        the class of the OutputFormat
   * @param path
   *        the path of the file or to the directory containing the splits
   * @param configuration
   *        optional configuration of the OutputFormat
   * @return the created {@link OutputFormat}
   * @throws IOException
   *         if an I/O error occurred while accessing the file or initializing the OutputFormat.
   */
  public static <T, F extends FileOutputFormat<? extends T>> F openOutput(
      Class<F> outputFormatClass, String path, Configuration configuration)
    throws IOException
  {
    final F outputFormat = ReflectionUtil.newInstance(outputFormatClass);
    outputFormat.setOutputFilePath(new Path(path));
    outputFormat.setOpenTimeout(0);
    outputFormat.setWriteMode(WriteMode.OVERWRITE);
 
    configuration = configuration == null ? new Configuration() : configuration;
   
    outputFormat.configure(configuration);
    outputFormat.open(0, 1);
    return outputFormat;
  }

  /**
   * Fixes the path if it denotes a local (relative) file without the proper protocol prefix.
   */
  private static Path normalizePath(Path path) {
    URI uri = path.toUri();
    if (uri.getScheme() == null) {
      try {
        uri = new URI("file", uri.getHost(), uri.getPath(), uri.getFragment());
        path = new Path(uri.toString());
      } catch (URISyntaxException e) {
        throw new IllegalArgumentException("path is invalid", e);
      }
    }
    return path;
  }
}
TOP

Related Classes of eu.stratosphere.api.common.io.FormatUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.