Package org.apache.hadoop.hdfs.server.datanode

Source Code of org.apache.hadoop.hdfs.server.datanode.DataStorage

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdfs.server.datanode;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileLock;
import java.util.Collection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.HardLink;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage.StorageState;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.io.IOUtils;

/**
* Data storage information file.
* <p>
* @see Storage
*/
public class DataStorage extends Storage {
  // Constants
  final static String BLOCK_SUBDIR_PREFIX = "subdir";
  final static String BLOCK_FILE_PREFIX = "blk_";
  final static String COPY_FILE_PREFIX = "dncp_";
  final static String STORAGE_DIR_DETACHED = "detach";
  public final static String STORAGE_DIR_TMP = "tmp";
 
  private final static String STORAGE_ID = "storageID";
 
  private String storageID;

  // flag to ensure initialzing storage occurs only once
  private boolean initialized = false;
 
  // NameSpaceStorage is map of <Name Space Id, NameSpaceStorage>
  private Map<Integer, NameSpaceSliceStorage> nsStorageMap
    = new HashMap<Integer, NameSpaceSliceStorage>();

  private final DataNode datanode;

  // Map of top level directory to layout version.
  Map<File, Integer> layoutMap = new HashMap<File, Integer>();

  DataStorage(DataNode datanode) {
    super(NodeType.DATA_NODE);
    storageID = "";
    this.datanode = datanode;
  }
 
  public DataStorage(StorageInfo storageInfo, String strgID, DataNode datanode) {
    super(NodeType.DATA_NODE, storageInfo);
    this.storageID = strgID;
    this.datanode = datanode;
  }

  public NameSpaceSliceStorage getNStorage(int namespaceId) {
    return nsStorageMap.get(namespaceId);
  }
 
  public String getStorageID() {
    return storageID;
  }
 
  void setStorageID(String newStorageID) {
    this.storageID = newStorageID;
  }

  synchronized void createStorageID(int datanodePort) {
    if (storageID != null && !storageID.isEmpty()) {
      return;
    }
    storageID = DataNode.createNewStorageId(datanodePort);
  }
 
  ArrayList<StorageDirectory> analyzeStorageDirs(NamespaceInfo nsInfo,
          Collection<File> dataDirs,
          StartupOption startOpt
          ) throws IOException {
   
    if (storageID == null)
      this.storageID = "";

    if (storageDirs == null) {
      this.storageDirs = new ArrayList<StorageDirectory>(dataDirs.size());
    } else {
      ((ArrayList<StorageDirectory>) storageDirs)
          .ensureCapacity(storageDirs.size() + dataDirs.size());
    }

    ArrayList<StorageDirectory> newDirs = new ArrayList<StorageDirectory>(
        dataDirs.size());
    ArrayList<StorageState> dataDirStates = new ArrayList<StorageState>(dataDirs.size());
    for(Iterator<File> it = dataDirs.iterator(); it.hasNext();) {
      File dataDir = it.next();
      StorageDirectory sd = new StorageDirectory(dataDir);
      StorageState curState;
      try {
        curState = sd.analyzeStorage(startOpt);
        // sd is locked but not opened
        switch(curState) {
          case NORMAL:
            break;
          case NON_EXISTENT:
            // ignore this storage
            LOG.info("Storage directory " + dataDir + " does not exist.");
            it.remove();
            continue;
          case NOT_FORMATTED: // format
            LOG.info("Storage directory " + dataDir + " is not formatted.");
            if (!sd.isEmpty()) {
              LOG.error("Storage directory " + dataDir
                + " is not empty, and will not be formatted! Exiting.");
              throw new IOException(
                "Storage directory " + dataDir + " is not empty!");
            }
            LOG.info("Formatting ...");
            format(sd, nsInfo);
            break;
          default// recovery part is common
            sd.doRecover(curState);
        }
      } catch (IOException ioe) {
        try {
          sd.unlock();
        }
        catch (IOException e) {
          LOG.warn("Exception when unlocking storage directory", e);
        }
        LOG.warn("Ignoring storage directory " + dataDir, ioe);
        //continue with other good dirs
        continue;
      }
      // add to the storage list
      addStorageDir(sd);
      newDirs.add(sd);
      dataDirStates.add(curState);
    }
   
    if (dataDirs.size() == 0// none of the data dirs exist
        throw new IOException(
                          "All specified directories are not accessible or do not exist.");
   
    return newDirs;
  }
 
  /**
   * Analyze storage directories.
   * Recover from previous transitions if required.
   * Perform fs state transition if necessary depending on the namespace info.
   * Read storage info.
   *
   * @param nsInfo namespace information
   * @param dataDirs array of data storage directories
   * @param startOpt startup option
   * @throws IOException
   */
  synchronized void recoverTransitionRead(DataNode datanode,
                             NamespaceInfo nsInfo,
                             Collection<File> dataDirs,
                             StartupOption startOpt
                             ) throws IOException {
    if (initialized) {
      // DN storage has been initialized, no need to do anything
      return;
    }

    assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() :
      "Data-node and name-node layout versions must be the same.";
   
    // 1. For each data directory calculate its state and
    // check whether all is consistent before transitioning.
    // Format and recover.
    analyzeStorageDirs(nsInfo, dataDirs, startOpt);

    // 2. Do transitions
    // Each storage directory is treated individually.
    // During startup some of them can upgrade or rollback
    // while others could be uptodate for the regular startup.
    doTransition(storageDirs, nsInfo, startOpt);

    // 3. make sure we have storage id set - if not - generate new one
    createStorageID(datanode.getPort());

    // 4. Update all storages. Some of them might have just been formatted.
    this.writeAll();

    this.initialized = true;
  }
 
  /**
   * merge the data directory from srcDataDirs to dstDataDirs
   * @return true if merge succeeds; false if no merge happens
   */
 
  boolean doMerge(String[] srcDataDirs, Collection<File> dstDataDirs,
      int namespaceId, NamespaceInfo nsInfo, StartupOption startOpt)
    throws IOException {
    HashMap<File, File> dirsToMerge = new HashMap<File, File>();
    int i = 0;
    for (Iterator<File> it = dstDataDirs.iterator(); it.hasNext(); i++) {
      File dstDataDir = it.next();
      if (dstDataDir.exists()) {
        continue;
      }
      File srcDataDir = NameSpaceSliceStorage.getNsRoot(
          namespaceId, new File(srcDataDirs[i], STORAGE_DIR_CURRENT));
      if (!srcDataDir.exists() || !srcDataDir.isDirectory()) {
        LOG.info("Source data directory " +
            srcDataDir + " doesn't exist.");
        continue;
      }
      dirsToMerge.put(srcDataDir, dstDataDir);
    }
    if (dirsToMerge.size() == 0)
      //No merge is needed
      return false;
   
    if (dirsToMerge.size() != dstDataDirs.size()) {
      // Last merge succeeds partially
      throw new IOException("Merge fail: not all directories are merged successfully.");
    }
   
    MergeThread[] mergeThreads = new MergeThread[dirsToMerge.size()];
    // start to merge
    i = 0;
    for (Map.Entry<File, File> entry: dirsToMerge.entrySet()) {
      MergeThread thread = new MergeThread(entry.getKey(), entry.getValue(), nsInfo);
      thread.start();
      mergeThreads[i] = thread;
      i++;
    }
    // wait for merge to be done
    for (MergeThread thread : mergeThreads) {
      try {
        thread.join();
      } catch (InterruptedException e) {
        throw (InterruptedIOException)new InterruptedIOException().initCause(e);
      }
    }
    // check for errors
    for (MergeThread thread : mergeThreads) {
      if (thread.error != null)
        throw new IOException(thread.error);
    }
    return true;
  }

  /**
   * recoverTransitionRead for a specific Name Space
   *
   * @param datanode DataNode
   * @param namespaceId name space Id
   * @param nsInfo Namespace info of namenode corresponding to the Name Space
   * @param dataDirs Storage directories
   * @param startOpt startup option
   * @throws IOException on error
   */
  void recoverTransitionRead(DataNode datanode, int namespaceId, NamespaceInfo nsInfo,
      Collection<File> dataDirs, StartupOption startOpt, String nameserviceId) throws IOException {
    // First ensure datanode level format/snapshot/rollback is completed
    // recoverTransitionRead(datanode, nsInfo, dataDirs, startOpt);
   
    // Create list of storage directories for the Name Space
    Collection<File> nsDataDirs = new ArrayList<File>();
    for(Iterator<File> it = dataDirs.iterator(); it.hasNext();) {
      File dnRoot = it.next();
      File nsRoot = NameSpaceSliceStorage.getNsRoot(
          namespaceId, new File(dnRoot, STORAGE_DIR_CURRENT));
      nsDataDirs.add(nsRoot);
    }
    boolean merged = false;
    String[] mergeDataDirs = nameserviceId == null? null:
      datanode.getConf().getStrings("dfs.merge.data.dir." + nameserviceId);
    if (startOpt.equals(StartupOption.REGULAR) && mergeDataDirs != null
        && mergeDataDirs.length > 0) {
      assert mergeDataDirs.length == dataDirs.size();
      merged = doMerge(mergeDataDirs, nsDataDirs, namespaceId, nsInfo, startOpt);
    }
    if (!merged) {
      // mkdir for the list of NameSpaceStorage
      makeNameSpaceDataDir(nsDataDirs);
    }
    NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage(
        namespaceId, this.getCTime(), layoutMap);
   
    nsStorage.recoverTransitionRead(datanode, nsInfo, nsDataDirs, startOpt);
    addNameSpaceStorage(namespaceId, nsStorage);
  }

  /**
   * Create physical directory for Name Spaces on the data node
   *
   * @param dataDirs
   *          List of data directories
   * @throws IOException on errors
   */
  public static void makeNameSpaceDataDir(Collection<File> dataDirs) throws IOException {
    for (File data : dataDirs) {
      try {
        DiskChecker.checkDir(data);
      } catch ( IOException e ) {
        LOG.warn("Invalid directory in: " + data.getCanonicalPath() + ": "
            + e.getMessage());
      }
    }
  }

  synchronized Collection<StorageDirectory> recoverTransitionAdditionalRead(NamespaceInfo nsInfo,
          Collection<File> dataDirs,
          StartupOption startOpt
          ) throws IOException{
    assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() :
        "Data-node and name-node layout versions must be the same.";
   
    // 1. For each data directory calculate its state and
    // check whether all is consistent before transitioning.
    // Format and recover.
    ArrayList<StorageDirectory> newDirs = analyzeStorageDirs(nsInfo, dataDirs, startOpt);

    // 2. Do transitions
    // Each storage directory is treated individually.
    // During startup some of them can upgrade or rollback
    // while others could be uptodate for the regular startup.
    doTransition(newDirs, nsInfo, startOpt);
    assert this.getLayoutVersion() == nsInfo.getLayoutVersion() :
        "Data-node and name-node layout versions must be the same.";
    assert this.getCTime() == nsInfo.getCTime() :
        "Data-node and name-node CTimes must be the same.";

    // 3. Update all storages. Some of them might have just been formatted.
    if (this.layoutVersion == 0) {
      layoutVersion = FSConstants.LAYOUT_VERSION;
    }
    for (StorageDirectory sd : newDirs) {
      sd.write();
    }

    return newDirs;
  }

  void format(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException {
    sd.clearDirectory(); // create directory
    this.layoutVersion = FSConstants.LAYOUT_VERSION;
    this.namespaceID = nsInfo.getNamespaceID()// mother namespaceid
    this.cTime = 0;
    // store storageID as it currently is
    sd.write();
  }

  protected void setFields(Properties props,
                           StorageDirectory sd
                           ) throws IOException {
    props.setProperty(STORAGE_TYPE, storageType.toString());
    props.setProperty(LAYOUT_VERSION, String.valueOf(layoutVersion));
    props.setProperty(STORAGE_ID, getStorageID());
    // Set NamespaceID in version before federation
    if (layoutVersion > FSConstants.FEDERATION_VERSION) {
      props.setProperty(NAMESPACE_ID, String.valueOf(namespaceID));
      props.setProperty(CHECK_TIME, String.valueOf(cTime));
    }
  }

  protected void getFields(Properties props,
                           StorageDirectory sd
                           ) throws IOException {
    setLayoutVersion(props, sd);
    setStorageType(props, sd);

    // Read NamespaceID in version before federation
    if (layoutVersion > FSConstants.FEDERATION_VERSION) {
      setNamespaceID(props, sd);
      setcTime(props, sd);
    }

    String ssid = props.getProperty(STORAGE_ID);
    if (ssid == null ||
        !("".equals(storageID) || "".equals(ssid) ||
          storageID.equals(ssid)))
      throw new InconsistentFSStateException(sd.getRoot(),
          "has incompatible storage Id.");
    if ("".equals(storageID)) // update id only if it was empty
      storageID = ssid;
  }

  public boolean isConversionNeeded(StorageDirectory sd) throws IOException {
    File oldF = new File(sd.getRoot(), "storage");
    if (!oldF.exists())
      return false;
    // check the layout version inside the storage file
    // Lock and Read old storage file
    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
    FileLock oldLock = oldFile.getChannel().tryLock();
    try {
      oldFile.seek(0);
      int oldVersion = oldFile.readInt();
      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
        return false;
    } finally {
      oldLock.release();
      oldFile.close();
    }
    return true;
  }

  private boolean isNsLevelUpgraded(int namespaceId, StorageDirectory sd) {
    File nsRoot = NameSpaceSliceStorage.getNsRoot(namespaceId, sd.getCurrentDir());
    return new File(nsRoot, STORAGE_DIR_PREVIOUS).exists();
  }

  /**
   * Analyze which and whether a transition of the fs state is required and
   * perform it if necessary.
   *
   * Rollback if (previousLV >= LAYOUT_VERSION && previousLV >
   * FEDERATION_VERSION)
   * Upgrade if this.LV > LAYOUT_VERSION && this.LV > FEDERATION_VERSION
   * Regular startup if this.LV = LAYOUT_VERSION && this.cTime = namenode.cTime
   *
   * @param nsInfo
   *          namespace info
   * @param startOpt
   *          startup option
   * @throws IOException
   */
  private void doTransition(List<StorageDirectory> sds,
                             NamespaceInfo nsInfo,
                             StartupOption startOpt
                             ) throws IOException {
    if (startOpt == StartupOption.ROLLBACK)
      doRollback(nsInfo); // rollback if applicable

    int numOfDirs = sds.size();
    List<StorageDirectory> dirsToUpgrade = new ArrayList<StorageDirectory>(numOfDirs);
    List<StorageInfo> dirsInfo = new ArrayList<StorageInfo>(numOfDirs);
    for (StorageDirectory sd : sds) {
      sd.read();
      layoutMap.put(sd.getRoot(), this.layoutVersion);
      checkVersionUpgradable(this.layoutVersion);
      assert this.layoutVersion >= FSConstants.LAYOUT_VERSION :
        "Future version is not allowed";
     
      boolean federationSupported =
        this.layoutVersion <= FSConstants.FEDERATION_VERSION;
      // For pre-federation version - validate the namespaceID
      if (!federationSupported &&
          getNamespaceID() != nsInfo.getNamespaceID()) {
        sd.unlock();
        throw new IOException(
            "Incompatible namespaceIDs in " + sd.getRoot().getCanonicalPath()
            + ": namenode namespaceID = " + nsInfo.getNamespaceID()
            + "; datanode namespaceID = " + getNamespaceID());
      }
      if (this.layoutVersion == FSConstants.LAYOUT_VERSION
          && this.cTime == nsInfo.getCTime())
        continue; // regular startup
      // verify necessity of a distributed upgrade
      verifyDistributedUpgradeProgress(nsInfo);
      // do a global upgrade iff layout version changes and current layout is
      // older than FEDERATION.
      if (this.layoutVersion > FSConstants.LAYOUT_VERSION
          && this.layoutVersion > FSConstants.FEDERATION_VERSION) {
        if (isNsLevelUpgraded(getNamespaceID(), sd)) {
          throw new IOException("Ns level directory already upgraded for : " +
              sd.getRoot() + " ignoring upgrade");
        }
        dirsToUpgrade.add(sd)// upgrade
        dirsInfo.add(new StorageInfo(this));
        continue;
      }
      if (this.cTime >= nsInfo.getCTime()) {
        // layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime
        // must shutdown
        sd.unlock();
        throw new IOException("Datanode state: LV = " + this.getLayoutVersion()
            + " CTime = " + this.getCTime()
            + " is newer than the namespace state: LV = "
            + nsInfo.getLayoutVersion()
            + " CTime = " + nsInfo.getCTime());
      }
    }
   
    // Now do upgrade if dirsToUpgrade is not empty
    if (!dirsToUpgrade.isEmpty()) {
      doUpgrade(dirsToUpgrade, dirsInfo, nsInfo);
    }
  }
 
  /**
   * A thread that merges a data storage directory from
   * srcDataDir to dstDataDir
   */
  static class MergeThread extends Thread {
    private File srcNSDir;
    private File dstNSDir;
    private NamespaceInfo nsInfo;
    volatile Throwable error = null;
    private static final String STORAGE_DIR_MERGE_TMP = "merge.tmp";
   
    MergeThread(File srcNSDir, File dstNSDir, NamespaceInfo nsInfo) {
      this.srcNSDir = srcNSDir;
      this.dstNSDir = dstNSDir;
      this.nsInfo = nsInfo;
      this.setName("Merging " + srcNSDir + " to " + dstNSDir);
    }
   
    /* check if the directory is merged */
    private boolean isMerged() {
      return dstNSDir.exists();
    }
   
    public void run() {
      try {
        if (isMerged()) {
          return;
        }
       
        LOG.info("Merging storage directory " + srcNSDir +
            " to " + dstNSDir);
       
        File mergeTmpDir = new File(dstNSDir.getParent(), STORAGE_DIR_MERGE_TMP);
        NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage(
            nsInfo.getNamespaceID(), nsInfo.getCTime());
        nsStorage.format(mergeTmpDir, nsInfo);
       
        assert srcNSDir.exists() : "Source directory must exist.";
        File mergeTmpNSDir = nsStorage.getNsRoot(mergeTmpDir);
        File srcCurNsDir = new File(srcNSDir, STORAGE_DIR_CURRENT);
        File mergeTmpCurNSDir = new File(mergeTmpNSDir, STORAGE_DIR_CURRENT);
        // hardlink all blocks 
        HardLink hardLink = new HardLink();
        linkBlocks(new File(srcCurNsDir, STORAGE_DIR_FINALIZED),
            new File(mergeTmpCurNSDir, STORAGE_DIR_FINALIZED),
            nsInfo.getLayoutVersion(), hardLink, true);
        linkBlocks(new File(srcCurNsDir, STORAGE_DIR_RBW),
            new File(mergeTmpCurNSDir, STORAGE_DIR_RBW),
            nsInfo.getLayoutVersion(), hardLink, true);
       
        // finally rename the tmp dir to dst dir
        if (!mergeTmpNSDir.renameTo(dstNSDir)) {
          throw new IOException("Cannot rename tmp directory " + mergeTmpNSDir +
              " to dst directory " + dstNSDir);
        }
      } catch (Throwable t) {
        error = t;
      }
    }
  }

  /**
   * A thread that upgrades a data storage directory
   */
  static class UpgradeThread extends Thread {
    private StorageDirectory sd;
    private StorageInfo si;
    private NamespaceInfo nsInfo;
    volatile Throwable error = null;
    private File topCurDir;
    private File[] namespaceDirs;
   
    UpgradeThread(StorageDirectory sd, StorageInfo si, NamespaceInfo nsInfo) {
      this.sd = sd;
      this.si = si;
      this.nsInfo = nsInfo;
      this.topCurDir = sd.getCurrentDir();
      this.namespaceDirs = topCurDir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String file) {
          return file.startsWith(NameSpaceSliceStorage.NS_DIR_PREFIX);
        }
      });
      this.setName("Upgrading " + sd.getRoot());
    }
   
    /** check if any of the namespace directory has a snapshot */
    private boolean isNamespaceUpgraded() {
      for (File namespaceDir : namespaceDirs) {
        if (new File(namespaceDir, STORAGE_DIR_PREVIOUS).exists()) {
          return true;
        }
      }
      return false;
    }
   
    public void run() {
      try {
        if (isNamespaceUpgraded()) {
          /// disallow coexistence of global and per namespace snapshots
          throw new IOException(
              "Local snapshot exists. Please either finalize or rollback first!");
        }

        LOG.info("Upgrading storage directory " + sd.getRoot()
            + ".\n   old LV = " + si.getLayoutVersion()
            + "; old CTime = " + si.getCTime()
            + ".\n   new LV = " + nsInfo.getLayoutVersion()
            + "; new CTime = " + nsInfo.getCTime());
        File curDir = sd.getCurrentDir();
        File prevDir = sd.getPreviousDir();
        // remove prev dir if it exists
        if (prevDir.exists()) {
          deleteDir(prevDir);
        }
        assert curDir.exists() : "Current directory must exist.";
        File tmpDir = sd.getPreviousTmp();
        assert !tmpDir.exists() : "previous.tmp directory must not exist.";
        // rename current to tmp
        rename(curDir, tmpDir);
       
        // hardlink blocks
        upgrade(si.getLayoutVersion(), nsInfo.getLayoutVersion(),
            tmpDir, curDir);
      } catch (Throwable t) {
        error = t;
      }
    }
   
    private void upgrade(int oldLayoutVersion, int curLayoutVersion,
        File tmpDir, File curDir) throws IOException {
      HardLink hardLink = new HardLink();
      if (oldLayoutVersion <= FSConstants.FEDERATION_VERSION) {
        // upgrade from a federation version to a newer federation version
        // link top directory
        linkBlocks(new File(tmpDir, STORAGE_DIR_FINALIZED),
            new File(curDir, STORAGE_DIR_FINALIZED), curLayoutVersion,
            hardLink, true);
        linkBlocks(new File(tmpDir, STORAGE_DIR_RBW),
            new File(curDir, STORAGE_DIR_RBW), curLayoutVersion,
            hardLink, true);
        // link all namespace directories
        for (File namespaceDir : namespaceDirs) {
          File tmpNamespaceCurDir = new File(
              new File(tmpDir, namespaceDir.getName()), STORAGE_DIR_CURRENT);
          File namespaceDirCur = new File(namespaceDir, STORAGE_DIR_CURRENT);
          linkBlocks(new File(tmpNamespaceCurDir, STORAGE_DIR_FINALIZED),
              new File(namespaceDirCur, STORAGE_DIR_FINALIZED),
              curLayoutVersion, hardLink, true);
          linkBlocks(new File(tmpNamespaceCurDir, STORAGE_DIR_RBW),
              new File(namespaceDirCur, STORAGE_DIR_RBW),
              curLayoutVersion, hardLink, true);
          //link Version file
          linkBlocks(new File(tmpNamespaceCurDir, STORAGE_FILE_VERSION),
              new File(namespaceDirCur, STORAGE_FILE_VERSION),
              curLayoutVersion, hardLink, true);
        }
      } else if (oldLayoutVersion <= FSConstants.RBW_LAYOUT_VERSION) {
        // upgrade from RBW layout version to Federation.
        // This is the directory data/current/NS-/
        File curNsDir = NameSpaceSliceStorage.getNsRoot(
            nsInfo.getNamespaceID(), curDir);
        NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage(
            nsInfo.getNamespaceID(), nsInfo.getCTime());
        nsStorage.format(curDir, nsInfo);

        // Move all blocks to this namespace directory
        // This is the directory data/current/NS-/current.
        File nsCurDir = new File(curNsDir, STORAGE_DIR_CURRENT);
        File curNsDirFinalized = new File(nsCurDir, STORAGE_DIR_FINALIZED);
        File curNsDirRbw = new File(nsCurDir, STORAGE_DIR_RBW);
        linkBlocks(new File(tmpDir, STORAGE_DIR_FINALIZED), curNsDirFinalized,
            curLayoutVersion, hardLink, false);
        linkBlocks(new File(tmpDir, STORAGE_DIR_RBW), curNsDirRbw,
            curLayoutVersion, hardLink, false);
      } else {
        // upgrade pre-rbw version to federation version
        // create the directory for the namespace
        File curNsDir = NameSpaceSliceStorage.getNsRoot(
            nsInfo.getNamespaceID(), curDir);
        NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage(
            nsInfo.getNamespaceID(), nsInfo.getCTime());
        nsStorage.format(curDir, nsInfo);

        // Move all blocks to this namespace directory
        File nsCurDir = new File(curNsDir, STORAGE_DIR_CURRENT);
        // Move finalized blocks
        File nsDirFinalized = new File(nsCurDir, STORAGE_DIR_FINALIZED);
        linkBlocks(tmpDir, nsDirFinalized, curLayoutVersion, hardLink, true);
        // Move rbw blocks
        File nsDirRbw = new File(nsCurDir, STORAGE_DIR_RBW);
        File oldDirRbw = new File(tmpDir.getParentFile(), OLD_STORAGE_DIR_RBW);
        linkBlocks(oldDirRbw, nsDirRbw, curLayoutVersion, hardLink, true);
      }
      LOG.info("Completed upgrading storage directory " + sd.getRoot() +
          " " + hardLink.linkStats.report());
    }
  }
   
  /**
   * Move current storage into a backup directory,
   * and hardlink all its blocks into the new current directory.
   */
  private void doUpgrade(List<StorageDirectory> sds,
                 List<StorageInfo> sdsInfo,
                 final NamespaceInfo nsInfo
                 ) throws IOException {
    assert sds.size() == sdsInfo.size();
    UpgradeThread[] upgradeThreads = new UpgradeThread[sds.size()];
    // start to upgrade
    for (int i=0; i<upgradeThreads.length; i++) {
      final StorageDirectory sd = sds.get(i);
      final StorageInfo si = sdsInfo.get(i);
      UpgradeThread thread = new UpgradeThread(sd, si, nsInfo);
      thread.start();
      upgradeThreads[i] = thread;
    }
    // wait for upgrade to be done
    for (UpgradeThread thread : upgradeThreads) {
      try {
        thread.join();
      } catch (InterruptedException e) {
        throw (InterruptedIOException)new InterruptedIOException().initCause(e);
      }
    }
    // check for errors
    for (UpgradeThread thread : upgradeThreads) {
      if (thread.error != null)
        throw new IOException(thread.error);
    }

    // write version file
    this.layoutVersion = FSConstants.LAYOUT_VERSION;
    assert this.namespaceID == nsInfo.getNamespaceID() :
      "Data-node and name-node layout versions must be the same.";
    this.cTime = nsInfo.getCTime();
    for (StorageDirectory sd :sds) {
      sd.write();
      File prevDir = sd.getPreviousDir();
      File tmpDir = sd.getPreviousTmp();
      // rename tmp to previous
      rename(tmpDir, prevDir);
      LOG.info("Upgrade of " + sd.getRoot()+ " is complete.");
    }
  }

  private void doRollback(NamespaceInfo nsInfo) throws IOException {
    int numDirs = getNumStorageDirs();
    RollbackThread[] rollbackThreads = new RollbackThread[numDirs];
    // start to rollback
    for (int i=0; i<numDirs; i++) {
      final StorageDirectory sd = this.getStorageDir(i);
      RollbackThread thread = new RollbackThread(sd, nsInfo, new DataStorage(
          datanode));
      thread.start();
      rollbackThreads[i] = thread;
    }
    // wait for rollback to be done
    for (RollbackThread thread : rollbackThreads) {
      try {
        thread.join();
      } catch (InterruptedException e) {
        return;
      }
    }
    // check for errors
    for (RollbackThread thread : rollbackThreads) {
      if (thread.error != null)
        throw new IOException(thread.error);
    }
  }

  static class RollbackThread extends Thread {
    private StorageDirectory sd;
    private NamespaceInfo nsInfo;
    volatile Throwable error;
    private Storage prevInfo;
   
    RollbackThread(StorageDirectory sd, NamespaceInfo nsInfo,
        Storage prevInfo) {
      this.sd = sd;
      this.nsInfo = nsInfo;
      this.setName("Rolling back " + sd.getRoot());
      this.prevInfo = prevInfo;
    }

    private boolean canRollBack(boolean globalRollback) {
      if (globalRollback) {
        return (prevInfo.getLayoutVersion() >= FSConstants.LAYOUT_VERSION);
      } else {
        return ((prevInfo.getLayoutVersion() >= FSConstants.LAYOUT_VERSION
            || prevInfo.getCTime() <= nsInfo.getCTime()));
      }
    }
   
    public void run() {
      try {
        File prevDir = sd.getPreviousDir();
        // regular startup if previous dir does not exist
        if (!prevDir.exists()) {
          return;
        }
        StorageDirectory prevSD = prevInfo.new StorageDirectory(sd.getRoot());
        prevSD.read(prevSD.getPreviousVersionFile());

        boolean globalRollback = prevInfo instanceof DataStorage;
        if (!canRollBack(globalRollback))
          throw new InconsistentFSStateException(prevSD.getRoot(),
              "Cannot rollback to a newer state.\nDatanode previous state: LV = "
              + prevInfo.getLayoutVersion() + " CTime = " + prevInfo.getCTime()
              + " is newer than the namespace state: LV = "
              + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime());
        LOG.info("Rolling back storage directory " + sd.getRoot()
            + ".\n   target LV = " + nsInfo.getLayoutVersion()
            + "; target CTime = " + nsInfo.getCTime());
        File tmpDir = sd.getRemovedTmp();
        assert !tmpDir.exists() : "removed.tmp directory must not exist.";
        // rename current to tmp
        File curDir = sd.getCurrentDir();
        assert curDir.exists() : "Current directory must exist.";
        rename(curDir, tmpDir);
        // rename previous to current
        rename(prevDir, curDir);
        // delete tmp dir
        deleteDir(tmpDir);
        LOG.info("Rollback of " + sd.getRoot() + " is complete.");
      } catch (Throwable t) {
        error = t;
      }
    }
  }
 
  void doFinalize(StorageDirectory sd) throws IOException {
    File prevDir = sd.getPreviousDir();
    if (!prevDir.exists())
      return; // already discarded
    final String dataDirPath = sd.getRoot().getCanonicalPath();
    LOG.info("Finalizing upgrade for storage directory "
             + dataDirPath
             + ".\n   cur LV = " + this.getLayoutVersion()
             + "; cur CTime = " + this.getCTime());
    assert sd.getCurrentDir().exists() : "Current directory must exist.";
    final File tmpDir = sd.getFinalizedTmp();
    // rename previous to tmp
    rename(prevDir, tmpDir);

    // delete tmp dir in a separate thread
    new Daemon(new Runnable() {
        public void run() {
          try {
            deleteDir(tmpDir);
          } catch(IOException ex) {
            LOG.error("Finalize upgrade for " + dataDirPath + " failed.", ex);
          }
          LOG.info("Finalize upgrade for " + dataDirPath + " is complete.");
        }
        public String toString() { return "Finalize " + dataDirPath; }
      }).start();
  }
 
  void finalizeUpgrade() throws IOException {
    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
      doFinalize(it.next());
    }
  }

  void finalizedUpgrade(int namespaceId) throws IOException {
    // To handle finalizing a snapshot taken at datanode level while
    // upgrading to federation, if datanode level snapshot previous exists,
    // then finalize it. Else finalize the corresponding BP.
    for (StorageDirectory sd : storageDirs) {
      File prevDir = sd.getPreviousDir();
      File curDir = sd.getCurrentDir();
      NameSpaceSliceStorage nsStorage = nsStorageMap.get(namespaceId);
      File nsRoot = nsStorage.getNsRoot(namespaceId, curDir);
      StorageDirectory nsSd = new StorageDirectory(nsRoot);
      if (prevDir.exists() && nsSd.getPreviousDir().exists()) {
        throw new IOException("Top level and NS level previous directories"
            + " cannot co-exist");
      }
      if (prevDir.exists()) {
        // data node level storage finalize
        doFinalize(sd);
      } else {
        // Name Space storage finalize using specific namespaceId
        nsStorage.doFinalize(curDir);
      }
    }
  }
 
  static void linkBlocks(File from, File to, int oldLV, HardLink hl, boolean createTo)
  throws IOException {
    if (!from.exists()) {
      LOG.warn(from + " does not exist");
      return;
    }
    if (!from.isDirectory()) {
      if (from.getName().startsWith(COPY_FILE_PREFIX) ||
          from.getName().equals(Storage.STORAGE_FILE_VERSION)) {
        FileInputStream in = new FileInputStream(from);
        FileOutputStream out = new FileOutputStream(to);
        try {
          IOUtils.copyBytes(in, out, 16*1024, true);
          hl.linkStats.countPhysicalFileCopies++;
        } finally {
          IOUtils.closeStream(in);
          IOUtils.closeStream(out);
        }
      } else {
       
        //check if we are upgrading from pre-generation stamp version.
        if (oldLV >= PRE_GENERATIONSTAMP_LAYOUT_VERSION) {
          // Link to the new file name.
          to = new File(convertMetatadataFileName(to.getAbsolutePath()));
        }
       
        HardLink.createHardLink(from, to);
        hl.linkStats.countSingleLinks++;
      }
      return;
    }
    // from is a directory
    hl.linkStats.countDirs++;
    if (createTo && !to.exists() && !to.mkdirs())
      throw new IOException("Cannot create directory " + to);
   
    //If upgrading from old stuff, need to munge the filenames.  That has to
    //be done one file at a time, so hardlink them one at a time (slow).
    if (oldLV >= PRE_GENERATIONSTAMP_LAYOUT_VERSION) {
      String[] blockNames = from.list(new java.io.FilenameFilter() {
        public boolean accept(File dir, String name) {
          return name.startsWith(BLOCK_SUBDIR_PREFIX)
          || name.startsWith(BLOCK_FILE_PREFIX)
          || name.startsWith(COPY_FILE_PREFIX);
        }
      });
      if (blockNames.length == 0) {
        hl.linkStats.countEmptyDirs++;
      } else {
        for(int i = 0; i < blockNames.length; i++)
          linkBlocks(new File(from, blockNames[i]),
            new File(to, blockNames[i]), oldLV, hl, true);
      }
    } else {
      //If upgrading from a relatively new version, we only need to create
      //links with the same filename.  This can be done in bulk (much faster).
      String[] blockNames = from.list(new java.io.FilenameFilter() {
        public boolean accept(File dir, String name) {
          return name.startsWith(BLOCK_FILE_PREFIX);
        }
      });
      if (blockNames.length > 0) {
        HardLink.createHardLinkMult(from, blockNames, to);
        hl.linkStats.countMultLinks++;
        hl.linkStats.countFilesMultLinks += blockNames.length;
      } else {
        hl.linkStats.countEmptyDirs++;
      }
     
      //now take care of the rest of the files and subdirectories
      String[] otherNames = from.list(new java.io.FilenameFilter() {
          public boolean accept(File dir, String name) {
            return name.startsWith(BLOCK_SUBDIR_PREFIX)
              || name.startsWith(COPY_FILE_PREFIX);
          }
        });
      for(int i = 0; i < otherNames.length; i++)
        linkBlocks(new File(from, otherNames[i]),
            new File(to, otherNames[i]), oldLV, hl, true);
    }
  }

  protected void corruptPreUpgradeStorage(File rootDir) throws IOException {
    File oldF = new File(rootDir, "storage");
    if (oldF.exists())
      return;
    // recreate old storage file to let pre-upgrade versions fail
    if (!oldF.createNewFile())
      throw new IOException("Cannot create file " + oldF);
    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
    // write new version into old storage file
    try {
      writeCorruptedData(oldFile);
    } finally {
      oldFile.close();
    }
  }

  private void verifyDistributedUpgradeProgress(
                  NamespaceInfo nsInfo
                ) throws IOException {
    UpgradeManagerDatanode um = datanode.getUpgradeManager(nsInfo
        .getNamespaceID());
    assert um != null : "DataNode.upgradeManager is null.";
    um.setUpgradeState(false, getLayoutVersion());
    um.initializeUpgrade(nsInfo);
  }

  private static final Pattern PRE_GENSTAMP_META_FILE_PATTERN =
    Pattern.compile("(.*blk_[-]*\\d+)\\.meta$");
  /**
   * This is invoked on target file names when upgrading from pre generation
   * stamp version (version -13) to correct the metatadata file name.
   * @param oldFileName
   * @return the new metadata file name with the default generation stamp.
   */
  private static String convertMetatadataFileName(String oldFileName) {
    Matcher matcher = PRE_GENSTAMP_META_FILE_PATTERN.matcher(oldFileName);
    if (matcher.matches()) {
      //return the current metadata file name
      return FSDataset.getMetaFileName(matcher.group(1),
                                       Block.GRANDFATHER_GENERATION_STAMP);
    }
    return oldFileName;
  }
 
  /**
   * Add nsStorage into nsStorageMap
   */ 
  private void addNameSpaceStorage(int nsID, NameSpaceSliceStorage nsStorage)
      throws IOException {
    if (!this.nsStorageMap.containsKey(nsID)) {
      this.nsStorageMap.put(nsID, nsStorage);
    }  
  }

  synchronized void removeNamespaceStorage(int nsId) {                                     
    nsStorageMap.remove(nsId);
  }
 

  /**
   * Get the data directory name that stores the namespace's blocks
   * @param namespaceId namespace id
   * @return the name of the last component of
   *         the given namespace's data directory
   */
  String getNameSpaceDataDir(int namespaceId) {
    return NameSpaceSliceStorage.getNamespaceDataDirName(namespaceId);
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.datanode.DataStorage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.