Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.BackupStorage

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import java.io.DataInputStream;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.Iterator;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.io.LongWritable;

@InterfaceAudience.Private
public class BackupStorage extends FSImage {
  // Names of the journal spool directory and the spool file
  private static final String STORAGE_JSPOOL_DIR = "jspool";
  private static final String STORAGE_JSPOOL_FILE =
                                              NameNodeFile.EDITS_NEW.getName();

  /** Backup input stream for loading edits into memory */
  private EditLogBackupInputStream backupInputStream;
  /** Is journal spooling in progress */
  volatile JSpoolState jsState;

  static enum JSpoolState {
    OFF,
    INPROGRESS,
    WAIT;
  }

  /**
   */
  BackupStorage() {
    super();
    jsState = JSpoolState.OFF;
  }

  @Override
  public boolean isConversionNeeded(StorageDirectory sd) {
    return false;
  }

  /**
   * Analyze backup storage directories for consistency.<br>
   * Recover from incomplete checkpoints if required.<br>
   * Read VERSION and fstime files if exist.<br>
   * Do not load image or edits.
   *
   * @param imageDirs list of image directories as URI.
   * @param editsDirs list of edits directories URI.
   * @throws IOException if the node should shutdown.
   */
  void recoverCreateRead(Collection<URI> imageDirs,
                         Collection<URI> editsDirs) throws IOException {
    setStorageDirectories(imageDirs, editsDirs);
    this.checkpointTime = 0L;
    for(Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
      StorageDirectory sd = it.next();
      StorageState curState;
      try {
        curState = sd.analyzeStorage(HdfsConstants.StartupOption.REGULAR);
        // sd is locked but not opened
        switch(curState) {
        case NON_EXISTENT:
          // fail if any of the configured storage dirs are inaccessible
          throw new InconsistentFSStateException(sd.getRoot(),
                "checkpoint directory does not exist or is not accessible.");
        case NOT_FORMATTED:
          // for backup node all directories may be unformatted initially
          LOG.info("Storage directory " + sd.getRoot() + " is not formatted.");
          LOG.info("Formatting ...");
          sd.clearDirectory(); // create empty current
          break;
        case NORMAL:
          break;
        default// recovery is possible
          sd.doRecover(curState);
        }
        if(curState != StorageState.NOT_FORMATTED) {
          sd.read(); // read and verify consistency with other directories
        }
      } catch(IOException ioe) {
        sd.unlock();
        throw ioe;
      }
    }
  }

  /**
   * Reset storage directories.
   * <p>
   * Unlock the storage.
   * Rename <code>current</code> to <code>lastcheckpoint.tmp</code>
   * and recreate empty <code>current</code>.
   * @throws IOException
   */
  synchronized void reset() throws IOException {
    // reset NameSpace tree
    FSDirectory fsDir = getFSNamesystem().dir;
    fsDir.reset();

    // unlock, close and rename storage directories
    unlockAll();
    // recover from unsuccessful checkpoint if necessary
    recoverCreateRead(getImageDirectories(), getEditsDirectories());
    // rename and recreate
    for(StorageDirectory sd : storageDirs) {
      // rename current to lastcheckpoint.tmp
      moveCurrent(sd);
    }
  }

  /**
   * Load checkpoint from local files only if the memory state is empty.<br>
   * Set new checkpoint time received from the name-node.<br>
   * Move <code>lastcheckpoint.tmp</code> to <code>previous.checkpoint</code>.
   * @throws IOException
   */
  void loadCheckpoint(CheckpointSignature sig) throws IOException {
    // load current image and journal if it is not in memory already
    if(!editLog.isOpen())
      editLog.open();

    FSDirectory fsDir = getFSNamesystem().dir;
    if(fsDir.isEmpty()) {
      Iterator<StorageDirectory> itImage = dirIterator(NameNodeDirType.IMAGE);
      Iterator<StorageDirectory> itEdits = dirIterator(NameNodeDirType.EDITS);
      if(!itImage.hasNext() || ! itEdits.hasNext())
        throw new IOException("Could not locate checkpoint directories");
      StorageDirectory sdName = itImage.next();
      StorageDirectory sdEdits = itEdits.next();
      synchronized(getFSDirectoryRootLock()) { // load image under rootDir lock
        loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
      }
      loadFSEdits(sdEdits);
    }

    // set storage fields
    setStorageInfo(sig);
    checkpointTime = sig.checkpointTime;
  }

  /**
   * Save meta-data into fsimage files.
   * and create empty edits.
   */
  void saveCheckpoint() throws IOException {
    saveNamespace(false);
  }

  private Object getFSDirectoryRootLock() {
    return getFSNamesystem().dir.rootDir;
  }

  static File getJSpoolDir(StorageDirectory sd) {
    return new File(sd.getRoot(), STORAGE_JSPOOL_DIR);
  }

  static File getJSpoolFile(StorageDirectory sd) {
    return new File(getJSpoolDir(sd), STORAGE_JSPOOL_FILE);
  }

  /**
   * Journal writer journals new meta-data state.
   * <ol>
   * <li> If Journal Spool state is OFF then journal records (edits)
   * are applied directly to meta-data state in memory and are written
   * to the edits file(s).</li>
   * <li> If Journal Spool state is INPROGRESS then records are only
   * written to edits.new file, which is called Spooling.</li>
   * <li> Journal Spool state WAIT blocks journaling until the
   * Journal Spool reader finalizes merging of the spooled data and
   * switches to applying journal to memory.</li>
   * </ol>
   * @param length length of data.
   * @param data serialized journal records.
   * @throws IOException
   * @see #convergeJournalSpool()
   */
  synchronized void journal(int length, byte[] data) throws IOException {
    assert backupInputStream.length() == 0 : "backup input stream is not empty";
    try {
      switch(jsState) {
        case WAIT:
        case OFF:
          // wait until spooling is off
          waitSpoolEnd();
          // update NameSpace in memory
          backupInputStream.setBytes(data);
          editLog.loadEditRecords(getLayoutVersion(),
                    backupInputStream.getDataInputStream(), true);
          getFSNamesystem().dir.updateCountForINodeWithQuota(); // inefficient!
          break;
        case INPROGRESS:
          break;
      }
      // write to files
      editLog.logEdit(length, data);
      editLog.logSync();
    } finally {
      backupInputStream.clear();
    }
  }

  private synchronized void waitSpoolEnd() {
    while(jsState == JSpoolState.WAIT) {
      try {
        wait();
      } catch (InterruptedException  e) {}
    }
    // now spooling should be off, verifying just in case
    assert jsState == JSpoolState.OFF : "Unexpected JSpool state: " + jsState;
  }

  /**
   * Start journal spool.
   * Switch to writing into edits.new instead of edits.
   *
   * edits.new for spooling is in separate directory "spool" rather than in
   * "current" because the two directories should be independent.
   * While spooling a checkpoint can happen and current will first
   * move to lastcheckpoint.tmp and then to previous.checkpoint
   * spool/edits.new will remain in place during that.
   */
  synchronized void startJournalSpool(NamenodeRegistration nnReg)
  throws IOException {
    switch(jsState) {
      case OFF:
        break;
      case INPROGRESS:
        return;
      case WAIT:
        waitSpoolEnd();
    }

    // create journal spool directories
    for(Iterator<StorageDirectory> it =
                          dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
      StorageDirectory sd = it.next();
      File jsDir = getJSpoolDir(sd);
      if (!jsDir.exists() && !jsDir.mkdirs()) {
        throw new IOException("Mkdirs failed to create "
                              + jsDir.getCanonicalPath());
      }
      // create edit file if missing
      File eFile = getEditFile(sd);
      if(!eFile.exists()) {
        editLog.createEditLogFile(eFile);
      }
    }

    if(!editLog.isOpen())
      editLog.open();

    // create streams pointing to the journal spool files
    // subsequent journal records will go directly to the spool
    editLog.divertFileStreams(STORAGE_JSPOOL_DIR + "/" + STORAGE_JSPOOL_FILE);
    setCheckpointState(CheckpointStates.ROLLED_EDITS);

    // set up spooling
    if(backupInputStream == null)
      backupInputStream = new EditLogBackupInputStream(nnReg.getAddress());
    jsState = JSpoolState.INPROGRESS;
  }

  synchronized void setCheckpointTime(int length, byte[] data)
  throws IOException {
    assert backupInputStream.length() == 0 : "backup input stream is not empty";
    try {
      // unpack new checkpoint time
      backupInputStream.setBytes(data);
      DataInputStream in = backupInputStream.getDataInputStream();
      byte op = in.readByte();
      assert op == NamenodeProtocol.JA_CHECKPOINT_TIME;
      LongWritable lw = new LongWritable();
      lw.readFields(in);
      setCheckpointTime(lw.get());
    } finally {
      backupInputStream.clear();
    }
  }

  /**
   * Merge Journal Spool to memory.<p>
   * Journal Spool reader reads journal records from edits.new.
   * When it reaches the end of the file it sets {@link JSpoolState} to WAIT.
   * This blocks journaling (see {@link #journal(int,byte[])}.
   * The reader
   * <ul>
   * <li> reads remaining journal records if any,</li>
   * <li> renames edits.new to edits,</li>
   * <li> sets {@link JSpoolState} to OFF,</li>
   * <li> and notifies the journaling thread.</li>
   * </ul>
   * Journaling resumes with applying new journal records to the memory state,
   * and writing them into edits file(s).
   */
  void convergeJournalSpool() throws IOException {
    Iterator<StorageDirectory> itEdits = dirIterator(NameNodeDirType.EDITS);
    if(! itEdits.hasNext())
      throw new IOException("Could not locate checkpoint directories");
    StorageDirectory sdEdits = itEdits.next();
    int numEdits = 0;
    File jSpoolFile = getJSpoolFile(sdEdits);
    long startTime = FSNamesystem.now();
    if(jSpoolFile.exists()) {
      // load edits.new
      EditLogFileInputStream edits = new EditLogFileInputStream(jSpoolFile);
      DataInputStream in = edits.getDataInputStream();
      numEdits += editLog.loadFSEdits(in, false);
 
      // first time reached the end of spool
      jsState = JSpoolState.WAIT;
      numEdits += editLog.loadEditRecords(getLayoutVersion(), in, true);
      getFSNamesystem().dir.updateCountForINodeWithQuota();
      edits.close();
    }

    FSImage.LOG.info("Edits file " + jSpoolFile.getCanonicalPath()
        + " of size " + jSpoolFile.length() + " edits # " + numEdits
        + " loaded in " + (FSNamesystem.now()-startTime)/1000 + " seconds.");

    // rename spool edits.new to edits making it in sync with the active node
    // subsequent journal records will go directly to edits
    editLog.revertFileStreams(STORAGE_JSPOOL_DIR + "/" + STORAGE_JSPOOL_FILE);

    // write version file
    resetVersion(false);

    // wake up journal writer
    synchronized(this) {
      jsState = JSpoolState.OFF;
      notifyAll();
    }

    // Rename lastcheckpoint.tmp to previous.checkpoint
    for(StorageDirectory sd : storageDirs) {
      moveLastCheckpoint(sd);
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.BackupStorage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.