Package org.apache.hadoop.raid

Source Code of org.apache.hadoop.raid.ParityFilePair

package org.apache.hadoop.raid;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.Path;

public class ParityFilePair {

  final private Path path;
  final private FileStatus stat;
  final private FileSystem fs;

  private ParityFilePair(Path path, FileStatus stat, FileSystem fs) {
    this.path = path;
    this.stat = stat;
    this.fs = fs;
  }

  public Path getPath() {
    return this.path;
  }

  public FileStatus getFileStatus() {
    return this.stat;
  }

  public FileSystem getFileSystem() {
    return this.fs;
  }

  /**
   * Return whether if parity file of the source file exists or not
   *
   * @param src     The FileStatus of the source file.
   * @param codec   The Codec of the parity to check
   * @param conf
   * @return
   * @throws IOException
   */
  public static boolean parityExists(FileStatus src, Codec codec,
      Configuration conf) throws IOException {
    return ParityFilePair.getParityFile(codec, src.getPath(), conf) != null;
  }

  /**
   * Returns the Path to the parity file of a given file / directory
   *
   * @param codec The Codec of the parity
   * @param srcPath Path to the original source file
   * @return ParityFilePair representing the parity file of the source
   * @throws IOException
   */
  public static ParityFilePair getParityFile(Codec codec, Path srcPath,
      Configuration conf) throws IOException {

    FileSystem fsSrc = srcPath.getFileSystem(conf);

    if (codec.isDirRaid) {
      FileStatus stat = null;
      try {
        stat = fsSrc.getFileStatus(srcPath);
      } catch (java.io.FileNotFoundException e) {
        return null;
      }
      if (!stat.isDir()) {
        // directory raid needs a directory to get parity file
        srcPath = srcPath.getParent();
      }
    }
    Path srcParent = srcPath.getParent();
    //We assume that parity file and source file live in the same cluster
    FileSystem fsDest = fsSrc;
    Path destPathPrefix = fsDest.makeQualified(new Path(
        codec.parityDirectory));

    FileStatus srcStatus = null;
    try {
      srcStatus = fsSrc.getFileStatus(srcPath);
    } catch (java.io.FileNotFoundException e) {
      return null;
    }

    Path outDir = destPathPrefix;
    if (srcParent != null) {
      if (srcParent.getParent() == null) {
        outDir = destPathPrefix;
      } else {
        outDir = new Path(destPathPrefix, RaidNode.makeRelative(srcParent));
      }
    }

    //CASE 1: CHECK HAR - Must be checked first because har is created after
    // parity file and returning the parity file could result in error while
    // reading it.
    Path outPath =  RaidNode.getOriginalParityFile(destPathPrefix, srcPath);
    String harDirName = srcParent.getName() + RaidNode.HAR_SUFFIX;
    Path HarPath = new Path(outDir,harDirName);
    if (fsDest.exists(HarPath)) {
      URI HarPathUri = HarPath.toUri();
      Path inHarPath = new Path("har://",HarPathUri.getPath()+"/"+outPath.toUri().getPath());
      FileSystem fsHar = new HarFileSystem(fsDest);
      fsHar.initialize(inHarPath.toUri(), conf);
      FileStatus inHar = FileStatusCache.get(fsHar, inHarPath);
      if (inHar != null) {
        if (verifyParity(srcStatus, inHar, codec, conf)) {
          return new ParityFilePair(inHarPath, inHar, fsHar);
        }
      }
    }

    //CASE 2: CHECK PARITY
    try {
      FileStatus outHar = fsDest.getFileStatus(outPath);
      if (verifyParity(srcStatus, outHar, codec, conf)) {
        return new ParityFilePair(outPath, outHar, fsDest);
      }
    } catch (java.io.FileNotFoundException e) {
    }

    return null; // NULL if no parity file
  }

  static boolean verifyParity(FileStatus src, FileStatus parity,
      Codec codec, Configuration conf) throws IOException {
    if (parity.getModificationTime() != src.getModificationTime()) {
      return false;
    }
    int stripeLength = codec.stripeLength;
    int parityLegnth = codec.parityLength;
    long expectedSize = 0;
    if (codec.isDirRaid) {
      FileSystem srcFs = src.getPath().getFileSystem(conf);
      List<FileStatus> lfs = RaidNode.listDirectoryRaidFileStatus(conf, srcFs,
          src.getPath());
      if (lfs == null) {
        return false;
      }

      long blockNum = DirectoryStripeReader.getBlockNum(lfs);
      long parityBlockSize = DirectoryStripeReader.getParityBlockSize(conf,
          lfs);
      int parityBlocks = (int)Math.ceil(
          ((double)blockNum) / stripeLength) * parityLegnth;
      expectedSize = parityBlocks * parityBlockSize;
    } else {
      double sourceBlocks = Math.ceil(
          ((double)src.getLen()) / src.getBlockSize());
      int parityBlocks = (int)Math.ceil(
          sourceBlocks / stripeLength) * parityLegnth;
      expectedSize = parityBlocks * src.getBlockSize();
    }
    if (parity.getLen() != expectedSize) {
      RaidNode.LOG.error("Bad parity file:" + parity.getPath() +
          " File size doen't match. parity:" + parity.getLen() +
          " expected:" + expectedSize);
      return false;
    }
    return true;
  }

  /**
   * Caches the listStatus result in a thread local cache. This greatly
   * increases the speed of repeatedly calling getParityFile() from
   * the same thread.
   */
  static class FileStatusCache {

    private static final long CACHE_STALE_PERIOD = 60 * 1000L;

    // The threadlocal stores a separate cache for each thread
    private static ThreadLocal<Map<Path, FileStatusWithTime>> tLocalCaches =
        new ThreadLocal<Map<Path, FileStatusWithTime>>();

    /**
     * Do getFileStatus with caching
     * @param fs FileSystem
     * @param file The path to do getFileStatus
     * @return FileStatus of the file
     * @throws IOException
     */
    static FileStatus get(FileSystem fs, Path file) throws IOException {

      long now = RaidNode.now();
      Map<Path, FileStatusWithTime> cache = tLocalCaches.get();

      // If the current thread do not have a cache, create one.
      if (cache == null || !useCache) {
        cache = new HashMap<Path, FileStatusWithTime>();
        tLocalCaches.set(cache);
      }
      FileStatusWithTime fileStatusWithTime = cache.get(file);
      if (fileStatusWithTime != null) {
        if (now - fileStatusWithTime.time > CACHE_STALE_PERIOD) {
          cache.remove(file);
        } else {
          return fileStatusWithTime.fileStatus;
        }
      }
      Path parent = file.getParent();
      // We cache the FileStatus in one directory. When move to a different
      // directory, the cache is cleared.
      cache.clear();
      FileStatus[] files = null;
      try {
        files = fs.listStatus(parent);
      } catch (FileNotFoundException e) {
      }
      if (files == null) {
        return null;
      }
      for (FileStatus status : files) {
        cache.put(status.getPath(), new FileStatusWithTime(status, now));
      }
      if (cache.containsKey(file)) {
        return cache.get(file).fileStatus;
      } else {
        return null;
      }
    }

  }

  private static class FileStatusWithTime {
    final FileStatus fileStatus;
    final long time;
    FileStatusWithTime(FileStatus fileStatus, long time) {
      this.fileStatus = fileStatus;
      this.time = time;
    }
  }

  private static boolean useCache = true;

  /**
   * Disable the cache of FileStatus
   * SHOULD NOT BE USED BY ANY PRODUCTION CODE
   */
  public static void disableCacheUsedInTestOnly() {
    useCache = false;
  }

}
TOP

Related Classes of org.apache.hadoop.raid.ParityFilePair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.