Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.FSEditLog$BlockTwo

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import java.io.BufferedInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeInstrumentation;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.io.WritableFactory;
import org.apache.hadoop.security.token.delegation.DelegationKey;

/**
* FSEditLog maintains a log of the namespace modifications.
*
*/
public class FSEditLog {
  public static final Log LOG = LogFactory.getLog(FSEditLog.class);

  static final byte OP_INVALID = -1;
  private static final byte OP_ADD = 0;
  private static final byte OP_RENAME = 1// rename
  private static final byte OP_DELETE = 2// delete
  private static final byte OP_MKDIR = 3;   // create directory
  private static final byte OP_SET_REPLICATION = 4; // set replication
  //the following two are used only for backward compatibility :
  @Deprecated private static final byte OP_DATANODE_ADD = 5;
  @Deprecated private static final byte OP_DATANODE_REMOVE = 6;
  private static final byte OP_SET_PERMISSIONS = 7;
  private static final byte OP_SET_OWNER = 8;
  private static final byte OP_CLOSE = 9;    // close after write
  private static final byte OP_SET_GENSTAMP = 10;    // store genstamp
  /* The following two are not used any more. Should be removed once
   * LAST_UPGRADABLE_LAYOUT_VERSION is -17 or newer. */
  private static final byte OP_SET_NS_QUOTA = 11; // set namespace quota
  private static final byte OP_CLEAR_NS_QUOTA = 12; // clear namespace quota
  private static final byte OP_TIMES = 13; // sets mod & access time on a file
  private static final byte OP_SET_QUOTA = 14; // sets name and disk quotas.
  private static final byte OP_CONCAT_DELETE = 16; // concat files.
  private static final byte OP_GET_DELEGATION_TOKEN = 18; //new delegation token
  private static final byte OP_RENEW_DELEGATION_TOKEN = 19; //renew delegation token
  private static final byte OP_CANCEL_DELEGATION_TOKEN = 20; //cancel delegation token
  private static final byte OP_UPDATE_MASTER_KEY = 21; //update master key

  private static int sizeFlushBuffer = 512*1024;
  /** Preallocation length in bytes for writing edit log. */
  static final int MIN_PREALLOCATION_LENGTH = 1024 * 1024;
  /** The limit of the length in bytes for each edit log transaction. */
  private static int TRANSACTION_LENGTH_LIMIT = Integer.MAX_VALUE;

  private ArrayList<EditLogOutputStream> editStreams = null;
  private FSImage fsimage = null;

  // a monotonically increasing counter that represents transactionIds.
  private long txid = 0;

  // stores the last synced transactionId.
  private long synctxid = 0;

  // the time of printing the statistics to the log file.
  private long lastPrintTime;

  // is a sync currently running?
  private boolean isSyncRunning;

  // these are statistics counters.
  private long numTransactions;        // number of transactions
  private long numTransactionsBatchedInSync;
  private long totalTimeTransactions;  // total time for all transactions
  private NameNodeInstrumentation metrics;

  private static class TransactionId {
    public long txid;

    TransactionId(long value) {
      this.txid = value;
    }
  }

  // stores the most current transactionId of this thread.
  private static final ThreadLocal<TransactionId> myTransactionId = new ThreadLocal<TransactionId>() {
    protected synchronized TransactionId initialValue() {
      return new TransactionId(Long.MAX_VALUE);
    }
  };

  /**
   * An implementation of the abstract class {@link EditLogOutputStream},
   * which stores edits in a local file.
   */
  static class EditLogFileOutputStream extends EditLogOutputStream {
    /** Preallocation buffer, padded with OP_INVALID */
    private static final ByteBuffer PREALLOCATION_BUFFER
        = ByteBuffer.allocateDirect(MIN_PREALLOCATION_LENGTH);
    static {
      PREALLOCATION_BUFFER.position(0).limit(MIN_PREALLOCATION_LENGTH);
      for(int i = 0; i < PREALLOCATION_BUFFER.capacity(); i++) {
        PREALLOCATION_BUFFER.put(OP_INVALID);
      }
    }

    private File file;
    private FileOutputStream fp;    // file stream for storing edit logs
    private FileChannel fc;         // channel of the file stream for sync
    private DataOutputBuffer bufCurrent;  // current buffer for writing
    private DataOutputBuffer bufReady;    // buffer ready for flushing

    EditLogFileOutputStream(File name) throws IOException {
      super();
      file = name;
      bufCurrent = new DataOutputBuffer(sizeFlushBuffer);
      bufReady = new DataOutputBuffer(sizeFlushBuffer);
      RandomAccessFile rp = new RandomAccessFile(name, "rw");
      fp = new FileOutputStream(rp.getFD()); // open for append
      fc = rp.getChannel();
      fc.position(fc.size());
    }

    @Override
    String getName() {
      return file.getPath();
    }

    /** {@inheritDoc} */
    @Override
    public void write(int b) throws IOException {
      bufCurrent.write(b);
    }

    /** {@inheritDoc} */
    @Override
    void write(byte op, Writable ... writables) throws IOException {
      write(op);
      for(Writable w : writables) {
        w.write(bufCurrent);
      }
    }

    /**
     * Create empty edits logs file.
     */
    @Override
    void create() throws IOException {
      fc.truncate(0);
      fc.position(0);
      bufCurrent.writeInt(FSConstants.LAYOUT_VERSION);
      setReadyToFlush();
      flush();
    }

    @Override
    public void close() throws IOException {
      LOG.info("closing edit log: position=" + fc.position() + ", editlog=" + getName());

      // close should have been called after all pending transactions
      // have been flushed & synced.
      int bufSize = bufCurrent.size();
      if (bufSize != 0) {
        throw new IOException("FSEditStream has " + bufSize +
           " bytes still to be flushed and cannot be closed.");
      }
      bufCurrent.close();
      bufReady.close();

      // remove any preallocated padding bytes from the transaction log.
      fc.truncate(fc.position());
      fp.close();
     
      bufCurrent = bufReady = null;

      LOG.info("close success: truncate to " + file.length() + ", editlog=" + getName());
    }

    /**
     * All data that has been written to the stream so far will be flushed.
     * New data can be still written to the stream while flushing is performed.
     */
    @Override
    void setReadyToFlush() throws IOException {
      assert bufReady.size() == 0 : "previous data is not flushed yet";
      DataOutputBuffer tmp = bufReady;
      bufReady = bufCurrent;
      bufCurrent = tmp;
    }

    /**
     * Flush ready buffer to persistent store.
     * currentBuffer is not flushed as it accumulates new log records
     * while readyBuffer will be flushed and synced.
     */
    @Override
    protected void flushAndSync() throws IOException {
      preallocate();            // preallocate file if necessary
      bufReady.writeTo(fp);     // write data to file
      bufReady.reset();         // erase all data in the buffer
      fc.force(false);          // metadata updates not needed because of preallocation
    }

    /**
     * Return the size of the current edit log including buffered data.
     */
    @Override
    long length() throws IOException {
      // file size + size of both buffers
      return fc.size() + bufReady.size() + bufCurrent.size();
    }

    // allocate a big chunk of data
    private void preallocate() throws IOException {
      long size = fc.size();
      int bufSize = bufReady.getLength();
      long need = bufSize - (size - fc.position());
      if (need <= 0) {
        return;
      }
      long oldSize = size;
      long total = 0;
      long fillCapacity = PREALLOCATION_BUFFER.capacity();
      while (need > 0) {
        PREALLOCATION_BUFFER.position(0);
        do {
          size += fc.write(PREALLOCATION_BUFFER, size);
        } while (PREALLOCATION_BUFFER.remaining() > 0);
        need -= fillCapacity;
        total += fillCapacity;
      }
      if(FSNamesystem.LOG.isDebugEnabled()) {
        FSNamesystem.LOG.debug("Preallocated " + total + " bytes at the end of " +
            "the edit log (offset " + oldSize + ")");
      }
    }
   
    /**
     * Returns the file associated with this stream
     */
    File getFile() {
      return file;
    }
  }

  static class EditLogFileInputStream extends EditLogInputStream {
    private File file;
    private FileInputStream fStream;

    EditLogFileInputStream(File name) throws IOException {
      file = name;
      fStream = new FileInputStream(name);
    }

    @Override
    String getName() {
      return file.getPath();
    }

    @Override
    public int available() throws IOException {
      return fStream.available();
    }

    @Override
    public int read() throws IOException {
      return fStream.read();
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
      return fStream.read(b, off, len);
    }

    @Override
    public void close() throws IOException {
      fStream.close();
    }

    @Override
    long length() throws IOException {
      // file size + size of both buffers
      return file.length();
    }
  }

  FSEditLog(FSImage image) {
    fsimage = image;
    isSyncRunning = false;
    metrics = NameNode.getNameNodeMetrics();
    lastPrintTime = FSNamesystem.now();
  }
 
  private File getEditFile(StorageDirectory sd) {
    return fsimage.getEditFile(sd);
  }
 
  private File getEditNewFile(StorageDirectory sd) {
    return fsimage.getEditNewFile(sd);
  }
 
  private int getNumStorageDirs() {
   int numStorageDirs = 0;
   Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
   while (it.hasNext()) {
     numStorageDirs++;
     it.next();
   }
   return numStorageDirs;
  }
 
  synchronized int getNumEditStreams() {
    return editStreams == null ? 0 : editStreams.size();
  }

  boolean isOpen() {
    return getNumEditStreams() > 0;
  }

  /**
   * Create empty edit log files.
   * Initialize the output stream for logging.
   *
   * @throws IOException
   */
  public synchronized void open() throws IOException {
    numTransactions = totalTimeTransactions = numTransactionsBatchedInSync = 0;
    if (editStreams == null) {
      editStreams = new ArrayList<EditLogOutputStream>();
    }
    Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
    while (it.hasNext()) {
      StorageDirectory sd = it.next();
      File eFile = getEditFile(sd);
      try {
        EditLogOutputStream eStream = new EditLogFileOutputStream(eFile);
        editStreams.add(eStream);
      } catch (IOException ioe) {
        fsimage.updateRemovedDirs(sd, ioe);
        it.remove();
      }
    }
    exitIfNoStreams();
  }

  public synchronized void createEditLogFile(File name) throws IOException {
    EditLogOutputStream eStream = new EditLogFileOutputStream(name);
    eStream.create();
    eStream.close();
  }

  /**
   * Shutdown the file store.
   */
  public synchronized void close() throws IOException {
    while (isSyncRunning) {
      try {
        wait(1000);
      } catch (InterruptedException ie) {
      }
    }
    if (editStreams == null) {
      return;
    }
    printStatistics(true);
    numTransactions = totalTimeTransactions = numTransactionsBatchedInSync = 0;

    for (int idx = 0; idx < editStreams.size(); idx++) {
      EditLogOutputStream eStream = editStreams.get(idx);
      try {
        eStream.setReadyToFlush();
        eStream.flush();
        eStream.close();
      } catch (IOException ioe) {
        removeEditsAndStorageDir(idx);
        idx--;
      }
    }
    editStreams.clear();
  }

  void fatalExit(String msg) {
    LOG.fatal(msg, new Exception(msg));
    Runtime.getRuntime().exit(-1);
  }

  /**
   * Exit the NN process if the edit streams have not yet been
   * initialized, eg we failed while opening.
   */
  private void exitIfStreamsNotSet() {
    if (editStreams == null) {
      fatalExit("Edit streams not yet initialized");
    }
  }

  /**
   * Exit the NN process if there are no edit streams to log to.
   */
  void exitIfNoStreams() {
    if (editStreams == null || editStreams.isEmpty()) {
      fatalExit("No edit streams are accessible");
    }
  }

  /**
   * @return the storage directory for the given edit stream.
   */
  private File getStorageDirForStream(int idx) {
    File editsFile =
      ((EditLogFileOutputStream)editStreams.get(idx)).getFile();
    // Namedir is the parent of current which is the parent of edits
    return editsFile.getParentFile().getParentFile();
  }

  /**
   * Remove the given edits stream and its containing storage dir.
   */
  synchronized void removeEditsAndStorageDir(int idx) {
    exitIfStreamsNotSet();

    assert idx < getNumStorageDirs();
    assert getNumStorageDirs() == editStreams.size();
   
    File dir = getStorageDirForStream(idx);
    editStreams.remove(idx);
    exitIfNoStreams();
    fsimage.removeStorageDir(dir);
  }

  /**
   * Remove all edits streams for the given storage directory.
   */
  synchronized void removeEditsForStorageDir(StorageDirectory sd) {
    exitIfStreamsNotSet();

    if (!sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
      return;
    }
    for (int idx = 0; idx < editStreams.size(); idx++) {
      File parentDir = getStorageDirForStream(idx);
      if (parentDir.getAbsolutePath().equals(
            sd.getRoot().getAbsolutePath())) {
        editStreams.remove(idx);
        idx--;
      }
    }
    exitIfNoStreams();
  }
 
  /**
   * Remove each of the given edits streams and their corresponding
   * storage directories.
   */
  private void removeEditsStreamsAndStorageDirs(
      ArrayList<EditLogOutputStream> errorStreams) {
    if (errorStreams == null) {
      return;
    }
    for (EditLogOutputStream errorStream : errorStreams) {
      int idx = editStreams.indexOf(errorStream);
      if (-1 == idx) {
        fatalExit("Unable to find edits stream with IO error");
      }
      removeEditsAndStorageDir(idx);
    }
    fsimage.incrementCheckpointTime();
  }

  /**
   * check if ANY edits.new log exists
   */
  boolean existsNew() throws IOException {
    Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
    while (it.hasNext()) {
      if (getEditNewFile(it.next()).exists()) {
        return true;
      }
    }
    return false;
  }

  /**
   * The end of a edit log should contain padding of either 0x00 or OP_INVALID.
   * If it contains other bytes, the edit log may be corrupted.
   * It is important to perform the check; otherwise, a stray OP_INVALID byte
   * could be misinterpreted as an end-of-log, and lead to silent data loss.
   */
  private static void checkEndOfLog(final EditLogInputStream edits,
      final DataInputStream in,
      final PositionTrackingInputStream pin,
      final int tolerationLength) throws IOException {
    if (tolerationLength < 0) {
      //the checking is disabled.
      return;
    }
    LOG.info("Start checking end of edit log (" + edits.getName() + ") ...");

    in.mark(0); //clear the mark
    final long readLength = pin.getPos();

    long firstPadPos = -1; //first padded byte position
    byte pad = 0;          //padded value; must be either 0 or OP_INVALID
   
    final byte[] bytes = new byte[4096];
    for(int n; (n = in.read(bytes)) != -1; ) {
      for(int i = 0; i < n; i++) {
        final byte b = bytes[i];

        if (firstPadPos != -1 && b != pad) {
          //the byte is different from the first padded byte, reset firstPos
          firstPadPos = -1;

          if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("reset: bytes[%d]=0x%X, pad=0x%02X", i, b, pad));
          }
        }

        if (firstPadPos == -1) {
          if (b == 0 || b == OP_INVALID) {
            //found the first padded byte
            firstPadPos = pin.getPos() - n + i;
            pad = b;
           
            if (LOG.isDebugEnabled()) {
              LOG.debug(String.format("found: bytes[%d]=0x%02X=pad, firstPadPos=%d",
                  i, b, firstPadPos));
            }
          }
        }
      }
    }
   
    final long corruptionLength;
    final long padLength;
    if (firstPadPos == -1) { //padding not found
      corruptionLength = edits.length() - readLength;
      padLength = 0;
    } else {
      corruptionLength = firstPadPos - readLength;
      padLength = edits.length() - firstPadPos;
    }

    LOG.info("Checked the bytes after the end of edit log (" + edits.getName() + "):");
    LOG.info("  Padding position  = " + firstPadPos + " (-1 means padding not found)");
    LOG.info("  Edit log length   = " + edits.length());
    LOG.info("  Read length       = " + readLength);
    LOG.info("  Corruption length = " + corruptionLength);
    LOG.info("  Toleration length = " + tolerationLength
        + " (= " + DFSConfigKeys.DFS_NAMENODE_EDITS_TOLERATION_LENGTH_KEY + ")");
    LOG.info(String.format(
        "Summary: |---------- Read=%d ----------|-- Corrupt=%d --|-- Pad=%d --|",
        readLength, corruptionLength, padLength));

    if (pin.getPos() != edits.length()) {
      throw new IOException("Edit log length mismatched: edits.length() = "
          + edits.length() + " != input steam position = " + pin.getPos());
    }
    if (corruptionLength > 0) {
      final String err = "Edit log corruption detected: corruption length = " + corruptionLength;
      if (corruptionLength <= tolerationLength) {
        LOG.warn(err + " <= toleration length = " + tolerationLength
            + "; the corruption is tolerable.");
      } else {
        throw new IOException(err + " > toleration length = " + tolerationLength
            + "; the corruption is intolerable.");
      }
    }
    return;
  }
 
  /**
   * Load an edit log, and apply the changes to the in-memory structure
   * This is where we apply edits that we've been writing to disk all
   * along.
   */
  static int loadFSEdits(EditLogInputStream edits, int tolerationLength,
      MetaRecoveryContext recovery) throws IOException {
    FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
    FSDirectory fsDir = fsNamesys.dir;
    int numEdits = 0;
    int logVersion = 0;
    String clientName = null;
    String clientMachine = null;
    String path = null;
    int numOpAdd = 0, numOpClose = 0, numOpDelete = 0,
        numOpRename = 0, numOpSetRepl = 0, numOpMkDir = 0,
        numOpSetPerm = 0, numOpSetOwner = 0, numOpSetGenStamp = 0,
        numOpTimes = 0, numOpGetDelegationToken = 0,
        numOpRenewDelegationToken = 0, numOpCancelDelegationToken = 0,
        numOpUpdateMasterKey = 0, numOpOther = 0,
        numOpConcatDelete = 0;
    long highestGenStamp = -1;
    long startTime = FSNamesystem.now();

    // Keep track of the file offsets of the last several opcodes.
    // This is handy when manually recovering corrupted edits files.
    PositionTrackingInputStream tracker =
      new PositionTrackingInputStream(new BufferedInputStream(edits));
    long recentOpcodeOffsets[] = new long[4];
    Arrays.fill(recentOpcodeOffsets, -1);

    final boolean isTolerationEnabled = tolerationLength >= 0;
    DataInputStream in = new DataInputStream(tracker);
    Byte opcode = null;
    try {
      // Read log file version. Could be missing.
      in.mark(4);
      // If edits log is greater than 2G, available method will return negative
      // numbers, so we avoid having to call available
      boolean available = true;
      try {
        logVersion = in.readByte();
      } catch (EOFException e) {
        available = false;
      }
      if (available) {
        in.reset();
        logVersion = in.readInt();
        if (logVersion < FSConstants.LAYOUT_VERSION) // future version
          throw new IOException(
                          "Unexpected version of the file system log file: "
                          + logVersion + ". Current version = "
                          + FSConstants.LAYOUT_VERSION + ".");
      }
      assert logVersion <= Storage.LAST_UPGRADABLE_LAYOUT_VERSION :
                            "Unsupported version " + logVersion;

      while (true) {
        if (isTolerationEnabled) {
          //mark position could be reset in case of exceptions
          in.mark(TRANSACTION_LENGTH_LIMIT);
        }

        long timestamp = 0;
        long mtime = 0;
        long atime = 0;
        long blockSize = 0;
        opcode = null;
        try {
          opcode = in.readByte();
          if (opcode == OP_INVALID) {
            LOG.info("Invalid opcode, reached end of edit log " +
                       "Number of transactions found: " + numEdits + ".  " +
                       "Bytes read: " + tracker.getPos());
            break; // no more transactions
          }
        } catch (EOFException e) {
          LOG.info("EOF of " + edits.getName() + ", reached end of edit log "
              + "Number of transactions found: " + numEdits + ".  "
              + "Bytes read: " + tracker.getPos());
          break; // no more transactions
        }
        recentOpcodeOffsets[numEdits % recentOpcodeOffsets.length] =
          tracker.getPos();
        numEdits++;
        switch (opcode) {
        case OP_ADD:
        case OP_CLOSE: {
          // versions > 0 support per file replication
          // get name and replication
          int length = in.readInt();
          if (-7 == logVersion && length != 3||
              -17 < logVersion && logVersion < -7 && length != 4 ||
              logVersion <= -17 && length != 5) {
              throw new IOException("Incorrect data format."  +
                                    " logVersion is " + logVersion +
                                    " but writables.length is " +
                                    length + ". ");
          }
          path = FSImage.readString(in);
          short replication = adjustReplication(readShort(in));
          mtime = readLong(in);
          if (logVersion <= -17) {
            atime = readLong(in);
          }
          if (logVersion < -7) {
            blockSize = readLong(in);
          }
          // get blocks
          Block blocks[] = null;
          if (logVersion <= -14) {
            blocks = readBlocks(in);
          } else {
            BlockTwo oldblk = new BlockTwo();
            int num = in.readInt();
            blocks = new Block[num];
            for (int i = 0; i < num; i++) {
              oldblk.readFields(in);
              blocks[i] = new Block(oldblk.blkid, oldblk.len,
                                    Block.GRANDFATHER_GENERATION_STAMP);
            }
          }

          // Older versions of HDFS does not store the block size in inode.
          // If the file has more than one block, use the size of the
          // first block as the blocksize. Otherwise use the default
          // block size.
          if (-8 <= logVersion && blockSize == 0) {
            if (blocks.length > 1) {
              blockSize = blocks[0].getNumBytes();
            } else {
              long first = ((blocks.length == 1)? blocks[0].getNumBytes(): 0);
              blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
            }
          }
          
          PermissionStatus permissions = fsNamesys.getUpgradePermission();
          if (logVersion <= -11) {
            permissions = PermissionStatus.read(in);
          }

          // clientname, clientMachine and block locations of last block.
          if (opcode == OP_ADD && logVersion <= -12) {
            clientName = FSImage.readString(in);
            clientMachine = FSImage.readString(in);
            if (-13 <= logVersion) {
              readDatanodeDescriptorArray(in);
            }
          } else {
            clientName = "";
            clientMachine = "";
          }

          // The open lease transaction re-creates a file if necessary.
          // Delete the file if it already exists.
          if (LOG.isDebugEnabled()) {
            LOG.debug(opcode + ": " + path +
                                   " numblocks : " + blocks.length +
                                   " clientHolder " +  clientName +
                                   " clientMachine " + clientMachine);
          }

          fsDir.unprotectedDelete(path, mtime);

          // add to the file tree
          INodeFile node = (INodeFile)fsDir.unprotectedAddFile(
                                                    path, permissions,
                                                    blocks, replication,
                                                    mtime, atime, blockSize);
          if (opcode == OP_ADD) {
            numOpAdd++;
            //
            // Replace current node with a INodeUnderConstruction.
            // Recreate in-memory lease record.
            //
            INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
                                      node.getLocalNameBytes(),
                                      node.getReplication(),
                                      node.getModificationTime(),
                                      node.getPreferredBlockSize(),
                                      node.getBlocks(),
                                      node.getPermissionStatus(),
                                      clientName,
                                      clientMachine,
                                      null);
            fsDir.replaceNode(path, node, cons);
            fsNamesys.leaseManager.addLease(cons.clientName, path);
          }
          break;
        }
        case OP_SET_REPLICATION: {
          numOpSetRepl++;
          path = FSImage.readString(in);
          short replication = adjustReplication(readShort(in));
          fsDir.unprotectedSetReplication(path, replication, null);
          break;
        }
        case OP_CONCAT_DELETE: {
          if (logVersion > -22) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          numOpConcatDelete++;
          int length = in.readInt();
          if (length < 3) { // trg, srcs.., timestam
            throw new IOException("Incorrect data format. "
                                  + "ConcatDelete operation.");
          }
          String trg = FSImage.readString(in);
          int srcSize = length - 1 - 1; //trg and timestamp
          String [] srcs = new String [srcSize];
          for(int i=0; i<srcSize;i++) {
            srcs[i]= FSImage.readString(in);
          }
          timestamp = readLong(in);
          fsDir.unprotectedConcat(trg, srcs, timestamp);
          break;
        }
        case OP_RENAME: {
          numOpRename++;
          int length = in.readInt();
          if (length != 3) {
            throw new IOException("Incorrect data format. "
                                  + "Rename operation.");
          }
          String s = FSImage.readString(in);
          String d = FSImage.readString(in);
          timestamp = readLong(in);
          HdfsFileStatus dinfo = fsDir.getFileInfo(d);
          fsDir.unprotectedRenameTo(s, d, timestamp);
          fsNamesys.changeLease(s, d, dinfo);
          break;
        }
        case OP_DELETE: {
          numOpDelete++;
          int length = in.readInt();
          if (length != 2) {
            throw new IOException("Incorrect data format. "
                                  + "delete operation.");
          }
          path = FSImage.readString(in);
          timestamp = readLong(in);
          fsDir.unprotectedDelete(path, timestamp);
          break;
        }
        case OP_MKDIR: {
          numOpMkDir++;
          PermissionStatus permissions = fsNamesys.getUpgradePermission();
          int length = in.readInt();
          if (-17 < logVersion && length != 2 ||
              logVersion <= -17 && length != 3) {
            throw new IOException("Incorrect data format. "
                                  + "Mkdir operation.");
          }
          path = FSImage.readString(in);
          timestamp = readLong(in);

          // The disk format stores atimes for directories as well.
          // However, currently this is not being updated/used because of
          // performance reasons.
          if (logVersion <= -17) {
            atime = readLong(in);
          }

          if (logVersion <= -11) {
            permissions = PermissionStatus.read(in);
          }
          fsDir.unprotectedMkdir(path, permissions, timestamp);
          break;
        }
        case OP_SET_GENSTAMP: {
          numOpSetGenStamp++;
          long lw = in.readLong();
          if ((highestGenStamp != -1) && (highestGenStamp + 1 != lw)) {
            throw new IOException("OP_SET_GENSTAMP tried to set a genstamp of " + lw +
              " but the previous highest genstamp was " + highestGenStamp);
          }
          highestGenStamp = lw;
          fsDir.namesystem.setGenerationStamp(lw);
          break;
        }
        case OP_DATANODE_ADD: {
          numOpOther++;
          FSImage.DatanodeImage nodeimage = new FSImage.DatanodeImage();
          nodeimage.readFields(in);
          //Datnodes are not persistent any more.
          break;
        }
        case OP_DATANODE_REMOVE: {
          numOpOther++;
          DatanodeID nodeID = new DatanodeID();
          nodeID.readFields(in);
          //Datanodes are not persistent any more.
          break;
        }
        case OP_SET_PERMISSIONS: {
          numOpSetPerm++;
          if (logVersion > -11)
            throw new IOException("Unexpected opcode " + opcode
                                  + " for version " + logVersion);
          fsDir.unprotectedSetPermission(
              FSImage.readString(in), FsPermission.read(in));
          break;
        }
        case OP_SET_OWNER: {
          numOpSetOwner++;
          if (logVersion > -11)
            throw new IOException("Unexpected opcode " + opcode
                                  + " for version " + logVersion);
          fsDir.unprotectedSetOwner(FSImage.readString(in),
              FSImage.readString_EmptyAsNull(in),
              FSImage.readString_EmptyAsNull(in));
          break;
        }
        case OP_SET_NS_QUOTA: {
          if (logVersion > -16) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          fsDir.unprotectedSetQuota(FSImage.readString(in),
                                    readLongWritable(in),
                                    FSConstants.QUOTA_DONT_SET);
          break;
        }
        case OP_CLEAR_NS_QUOTA: {
          if (logVersion > -16) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          fsDir.unprotectedSetQuota(FSImage.readString(in),
                                    FSConstants.QUOTA_RESET,
                                    FSConstants.QUOTA_DONT_SET);
          break;
        }

        case OP_SET_QUOTA:
          fsDir.unprotectedSetQuota(FSImage.readString(in),
                                    readLongWritable(in),
                                    readLongWritable(in));
                                     
          break;

        case OP_TIMES: {
          numOpTimes++;
          int length = in.readInt();
          if (length != 3) {
            throw new IOException("Incorrect data format. "
                                  + "times operation.");
          }
          path = FSImage.readString(in);
          mtime = readLong(in);
          atime = readLong(in);
          fsDir.unprotectedSetTimes(path, mtime, atime, true);
          break;
        }
        case OP_GET_DELEGATION_TOKEN: {
          if (logVersion > -19) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          numOpGetDelegationToken++;
          DelegationTokenIdentifier delegationTokenId =
              new DelegationTokenIdentifier();
          delegationTokenId.readFields(in);
          long expiryTime = readLong(in);
          fsNamesys.getDelegationTokenSecretManager()
              .addPersistedDelegationToken(delegationTokenId, expiryTime);
          break;
        }
        case OP_RENEW_DELEGATION_TOKEN: {
          if (logVersion > -19) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          numOpRenewDelegationToken++;
          DelegationTokenIdentifier delegationTokenId =
              new DelegationTokenIdentifier();
          delegationTokenId.readFields(in);
          long expiryTime = readLong(in);
          fsNamesys.getDelegationTokenSecretManager()
              .updatePersistedTokenRenewal(delegationTokenId, expiryTime);
          break;
        }
        case OP_CANCEL_DELEGATION_TOKEN: {
          if (logVersion > -19) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          numOpCancelDelegationToken++;
          DelegationTokenIdentifier delegationTokenId =
              new DelegationTokenIdentifier();
          delegationTokenId.readFields(in);
          fsNamesys.getDelegationTokenSecretManager()
              .updatePersistedTokenCancellation(delegationTokenId);
          break;
        }
        case OP_UPDATE_MASTER_KEY: {
          if (logVersion > -19) {
            throw new IOException("Unexpected opcode " + opcode
                + " for version " + logVersion);
          }
          numOpUpdateMasterKey++;
          DelegationKey delegationKey = new DelegationKey();
          delegationKey.readFields(in);
          fsNamesys.getDelegationTokenSecretManager().updatePersistedMasterKey(
              delegationKey);
          break;
        }
        default: {
          throw new IOException("Never seen opcode " + opcode);
        }
        }
      }
    } catch (Throwable t) {
      String msg = "Failed to parse edit log (" + edits.getName()
          + ") at position " + tracker.getPos()
          + ", edit log length is " + edits.length()
          + ", opcode=" + opcode
          + ", isTolerationEnabled=" + isTolerationEnabled;

      // Catch Throwable because in the case of a truly corrupt edits log, any
      // sort of error might be thrown (NumberFormat, NullPointer, EOF, etc.)
      if (Storage.is203LayoutVersion(logVersion) &&
          logVersion != FSConstants.LAYOUT_VERSION) {
        // Failed to load 0.20.203 version edits during upgrade. This version has
        // conflicting opcodes with the later releases. The editlog must be
        // emptied by restarting the namenode, before proceeding with the upgrade.
        msg += ": During upgrade, failed to load the editlog version " +
          logVersion + " from release 0.20.203. Please go back to the old " +
          " release and restart the namenode. This empties the editlog " +
          " and saves the namespace. Resume the upgrade after this step.";
        throw new IOException(msg, t);
      }
      if (recentOpcodeOffsets[0] != -1) {
        Arrays.sort(recentOpcodeOffsets);
        StringBuilder sb = new StringBuilder(", Recent opcode offsets=[")
            .append(recentOpcodeOffsets[0]);
        for (int i = 1; i < recentOpcodeOffsets.length; i++) {
          if (recentOpcodeOffsets[i] != -1) {
            sb.append(' ').append(recentOpcodeOffsets[i]);
          }
        }
        msg += sb.append("]");
      }

      LOG.warn(msg, t);
      if (isTolerationEnabled) {
        in.reset(); //reset to the beginning position of this transaction
      } else {
        //edit log toleration feature is disabled
        MetaRecoveryContext.editLogLoaderPrompt(msg, recovery);
      }
    } finally {
      try {
        checkEndOfLog(edits, in, tracker, tolerationLength);
      } finally {
        in.close();
      }
    }
    LOG.info("Edits file " + edits.getName()
        + " of size " + edits.length() + " edits # " + numEdits
        + " loaded in " + (FSNamesystem.now()-startTime)/1000 + " seconds.");

    if (LOG.isDebugEnabled()) {
      LOG.debug("numOpAdd = " + numOpAdd + " numOpClose = " + numOpClose
          + " numOpDelete = " + numOpDelete + " numOpRename = " + numOpRename
          + " numOpSetRepl = " + numOpSetRepl + " numOpMkDir = " + numOpMkDir
          + " numOpSetPerm = " + numOpSetPerm
          + " numOpSetOwner = " + numOpSetOwner
          + " numOpSetGenStamp = " + numOpSetGenStamp
          + " numOpTimes = " + numOpTimes
          + " numOpGetDelegationToken = " + numOpGetDelegationToken
          + " numOpRenewDelegationToken = " + numOpRenewDelegationToken
          + " numOpCancelDelegationToken = " + numOpCancelDelegationToken
          + " numOpUpdateMasterKey = " + numOpUpdateMasterKey
          + " numOpConcatDelete  = " + numOpConcatDelete
          + " numOpOther = " + numOpOther);
    }

    if (logVersion != FSConstants.LAYOUT_VERSION) // other version
      numEdits++; // save this image asap
    return numEdits;
  }

  // a place holder for reading a long
  private static final LongWritable longWritable = new LongWritable();

  /** Read an integer from an input stream */
  private static long readLongWritable(DataInputStream in) throws IOException {
    synchronized (longWritable) {
      longWritable.readFields(in);
      return longWritable.get();
    }
  }
 
  static short adjustReplication(short replication) {
    FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
    short minReplication = fsNamesys.getMinReplication();
    if (replication<minReplication) {
      replication = minReplication;
    }
    short maxReplication = fsNamesys.getMaxReplication();
    if (replication>maxReplication) {
      replication = maxReplication;
    }
    return replication;
  }

  /**
   * Write an operation to the edit log. Do not sync to persistent
   * store yet.
   */
  synchronized void logEdit(byte op, Writable ... writables) {
    if (getNumEditStreams() < 1) {
      throw new AssertionError("No edit streams to log to");
    }
    long start = FSNamesystem.now();
    for (int idx = 0; idx < editStreams.size(); idx++) {
      EditLogOutputStream eStream = editStreams.get(idx);
      try {
        eStream.write(op, writables);
      } catch (IOException ioe) {
        removeEditsAndStorageDir(idx);
        idx--;
      }
    }
    exitIfNoStreams();
    // get a new transactionId
    txid++;

    //
    // record the transactionId when new data was written to the edits log
    //
    TransactionId id = myTransactionId.get();
    id.txid = txid;

    // update statistics
    long end = FSNamesystem.now();
    numTransactions++;
    totalTimeTransactions += (end-start);
    if (metrics != null) // Metrics is non-null only when used inside name node
      metrics.addTransaction(end-start);
  }

  //
  // Sync all modifications done by this thread.
  //
  public void logSync() throws IOException {
    ArrayList<EditLogOutputStream> errorStreams = null;
    long syncStart = 0;

    // Fetch the transactionId of this thread.
    long mytxid = myTransactionId.get().txid;

    ArrayList<EditLogOutputStream> streams = new ArrayList<EditLogOutputStream>();
    boolean sync = false;
    try {
      synchronized (this) {
        printStatistics(false);

        // if somebody is already syncing, then wait
        while (mytxid > synctxid && isSyncRunning) {
          try {
            wait(1000);
          } catch (InterruptedException ie) {
          }
        }

        //
        // If this transaction was already flushed, then nothing to do
        //
        if (mytxid <= synctxid) {
          numTransactionsBatchedInSync++;
          if (metrics != null) // Metrics is non-null only when used inside name node
            metrics.incrTransactionsBatchedInSync();
          return;
        }

        // now, this thread will do the sync
        syncStart = txid;
        isSyncRunning = true;
        sync = true;

        // swap buffers
        exitIfNoStreams();
        for(EditLogOutputStream eStream : editStreams) {
          try {
            eStream.setReadyToFlush();
            streams.add(eStream);
          } catch (IOException ie) {
            LOG.error("Unable to get ready to flush.", ie);
            //
            // remember the streams that encountered an error.
            //
            if (errorStreams == null) {
              errorStreams = new ArrayList<EditLogOutputStream>(1);
            }
            errorStreams.add(eStream);
          }
        }
      }

      // do the sync
      long start = FSNamesystem.now();
      for (EditLogOutputStream eStream : streams) {
        try {
          eStream.flush();
        } catch (IOException ie) {
          LOG.error("Unable to sync edit log.", ie);
          //
          // remember the streams that encountered an error.
          //
          if (errorStreams == null) {
            errorStreams = new ArrayList<EditLogOutputStream>(1);
          }
          errorStreams.add(eStream);
        }
      }
      long elapsed = FSNamesystem.now() - start;
      removeEditsStreamsAndStorageDirs(errorStreams);
      exitIfNoStreams();

      if (metrics != null) // Metrics is non-null only when used inside name node
        metrics.addSync(elapsed);

    } finally {
      synchronized (this) {
        if(sync) {
          synctxid = syncStart;
          isSyncRunning = false;
        }
        this.notifyAll();
      }
    }
  }

  //
  // print statistics every 1 minute.
  //
  private void printStatistics(boolean force) {
    long now = FSNamesystem.now();
    if (lastPrintTime + 60000 > now && !force) {
      return;
    }
    if (editStreams == null || editStreams.size()==0) {
      return;
    }
    lastPrintTime = now;
    StringBuilder buf = new StringBuilder();
    buf.append("Number of transactions: ");
    buf.append(numTransactions);
    buf.append(" Total time for transactions(ms): ");
    buf.append(totalTimeTransactions);
    buf.append(" Number of transactions batched in Syncs: ");
    buf.append(numTransactionsBatchedInSync);
    buf.append(" Number of syncs: ");
    buf.append(editStreams.get(0).getNumSync());
    buf.append(" SyncTimes(ms): ");

    int numEditStreams = editStreams.size();
    for (int idx = 0; idx < numEditStreams; idx++) {
      EditLogOutputStream eStream = editStreams.get(idx);
      buf.append(eStream.getTotalSyncTime());
      buf.append(" ");
    }
    LOG.info(buf);
  }

  /**
   * Add open lease record to edit log.
   * Records the block locations of the last block.
   */
  public void logOpenFile(String path, INodeFileUnderConstruction newNode)
                   throws IOException {

    UTF8 nameReplicationPair[] = new UTF8[] {
      new UTF8(path),
      FSEditLog.toLogReplication(newNode.getReplication()),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime()),
      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
    logEdit(OP_ADD,
            new ArrayWritable(UTF8.class, nameReplicationPair),
            new ArrayWritable(Block.class, newNode.getBlocks()),
            newNode.getPermissionStatus(),
            new UTF8(newNode.getClientName()),
            new UTF8(newNode.getClientMachine()));
  }

  /**
   * Add close lease record to edit log.
   */
  public void logCloseFile(String path, INodeFile newNode) {
    UTF8 nameReplicationPair[] = new UTF8[] {
      new UTF8(path),
      FSEditLog.toLogReplication(newNode.getReplication()),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime()),
      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
    logEdit(OP_CLOSE,
            new ArrayWritable(UTF8.class, nameReplicationPair),
            new ArrayWritable(Block.class, newNode.getBlocks()),
            newNode.getPermissionStatus());
  }
 
  /**
   * Add create directory record to edit log
   */
  public void logMkDir(String path, INode newNode) {
    UTF8 info[] = new UTF8[] {
      new UTF8(path),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime())
    };
    logEdit(OP_MKDIR, new ArrayWritable(UTF8.class, info),
        newNode.getPermissionStatus());
  }
 
  /**
   * Add rename record to edit log
   * TODO: use String parameters until just before writing to disk
   */
  void logRename(String src, String dst, long timestamp) {
    UTF8 info[] = new UTF8[] {
      new UTF8(src),
      new UTF8(dst),
      FSEditLog.toLogLong(timestamp)};
    logEdit(OP_RENAME, new ArrayWritable(UTF8.class, info));
  }
 
  /**
   * Add set replication record to edit log
   */
  void logSetReplication(String src, short replication) {
    logEdit(OP_SET_REPLICATION,
            new UTF8(src),
            FSEditLog.toLogReplication(replication));
  }
 
  /** Add set namespace quota record to edit log
   *
   * @param src the string representation of the path to a directory
   * @param quota the directory size limit
   */
  void logSetQuota(String src, long nsQuota, long dsQuota) {
    logEdit(OP_SET_QUOTA, new UTF8(src),
            new LongWritable(nsQuota), new LongWritable(dsQuota));
  }

  /**  Add set permissions record to edit log */
  void logSetPermissions(String src, FsPermission permissions) {
    logEdit(OP_SET_PERMISSIONS, new UTF8(src), permissions);
  }

  /**  Add set owner record to edit log */
  void logSetOwner(String src, String username, String groupname) {
    UTF8 u = new UTF8(username == null? "": username);
    UTF8 g = new UTF8(groupname == null? "": groupname);
    logEdit(OP_SET_OWNER, new UTF8(src), u, g);
  }

  /**
   * concat(trg,src..) log
   */
  void logConcat(String trg, String [] srcs, long timestamp) {
    int size = 1 + srcs.length + 1; // trg, srcs, timestamp
    UTF8 info[] = new UTF8[size];
    int idx = 0;
    info[idx++] = new UTF8(trg);
    for(int i=0; i<srcs.length; i++) {
      info[idx++] = new UTF8(srcs[i]);
    }
    info[idx] = FSEditLog.toLogLong(timestamp);
    logEdit(OP_CONCAT_DELETE, new ArrayWritable(UTF8.class, info));
  }
 
  /**
   * Add delete file record to edit log
   */
  void logDelete(String src, long timestamp) {
    UTF8 info[] = new UTF8[] {
      new UTF8(src),
      FSEditLog.toLogLong(timestamp)};
    logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info));
  }

  /**
   * Add generation stamp record to edit log
   */
  void logGenerationStamp(long genstamp) {
    logEdit(OP_SET_GENSTAMP, new LongWritable(genstamp));
  }

  /**
   * Add access time record to edit log
   */
  void logTimes(String src, long mtime, long atime) {
    UTF8 info[] = new UTF8[] {
      new UTF8(src),
      FSEditLog.toLogLong(mtime),
      FSEditLog.toLogLong(atime)};
    logEdit(OP_TIMES, new ArrayWritable(UTF8.class, info));
  }

  /**
   * log delegation token to edit log
   * @param id DelegationTokenIdentifier
   * @param expiryTime of the token
   * @return
   */
  void logGetDelegationToken(DelegationTokenIdentifier id,
      long expiryTime) {
    logEdit(OP_GET_DELEGATION_TOKEN, id, FSEditLog.toLogLong(expiryTime));
  }

  void logRenewDelegationToken(DelegationTokenIdentifier id,
      long expiryTime) {
    logEdit(OP_RENEW_DELEGATION_TOKEN, id, FSEditLog.toLogLong(expiryTime));
  }

  void logCancelDelegationToken(DelegationTokenIdentifier id) {
    logEdit(OP_CANCEL_DELEGATION_TOKEN, id);
  }

  void logUpdateMasterKey(DelegationKey key) {
    logEdit(OP_UPDATE_MASTER_KEY, key);
  }

  static private UTF8 toLogReplication(short replication) {
    return new UTF8(Short.toString(replication));
  }
 
  static private UTF8 toLogLong(long timestamp) {
    return new UTF8(Long.toString(timestamp));
  }

  /**
   * Return the size of the current EditLog
   */
  synchronized long getEditLogSize() throws IOException {
    assert(getNumStorageDirs() == editStreams.size());
    long size = 0;
    for (int idx = 0; idx < editStreams.size(); idx++) {
      long curSize = editStreams.get(idx).length();
      assert (size == 0 || size == curSize) : "All streams must be the same";
      size = curSize;
    }
    return size;
  }

  /**
   * Closes the current edit log and opens edits.new.
   * Returns the lastModified time of the edits log.
   */
  synchronized void rollEditLog() throws IOException {
    //
    // If edits.new already exists in some directory, verify it
    // exists in all directories.
    //
    if (existsNew()) {
      Iterator<StorageDirectory> it =
        fsimage.dirIterator(NameNodeDirType.EDITS);
      StringBuilder b = new StringBuilder();
      while (it.hasNext()) {
        File editsNew = getEditNewFile(it.next());
        b.append("\n  ").append(editsNew);
        if (!editsNew.exists()) {
          throw new IOException(
              "Inconsistent existence of edits.new " + editsNew);
        }
      }
      LOG.warn("Cannot roll edit log," +
          " edits.new files already exists in all healthy directories:" + b);
      return;
    }
    close(); // close existing edit log

    // After edit streams are closed, healthy edits files should be identical,
    // and same to fsimage files
    fsimage.restoreStorageDirs();
   
    //
    // Open edits.new
    //
    Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
    LinkedList<StorageDirectory> toRemove = new LinkedList<StorageDirectory>();
    while (it.hasNext()) {
      StorageDirectory sd = it.next();
      try {
        EditLogFileOutputStream eStream =
             new EditLogFileOutputStream(getEditNewFile(sd));
        eStream.create();
        editStreams.add(eStream);
      } catch (IOException ioe) {
        LOG.error("error retrying to reopen storage directory '" +
            sd.getRoot().getAbsolutePath() + "'", ioe);
        toRemove.add(sd);
        it.remove();
      }
    }

    // updateRemovedDirs will abort the NameNode if it removes the last
    // valid edit log directory.
    for (StorageDirectory sd : toRemove) {
      removeEditsForStorageDir(sd);
      fsimage.updateRemovedDirs(sd);
    }
    exitIfNoStreams();
  }

  /**
   * Removes the old edit log and renamed edits.new as edits.
   * Reopens the edits file.
   */
  synchronized void purgeEditLog() throws IOException {
    //
    // If edits.new does not exists, then return error.
    //
    if (!existsNew()) {
      throw new IOException("Attempt to purge edit log " +
                            "but edits.new does not exist.");
    }
    close();

    //
    // Delete edits and rename edits.new to edits.
    //
    Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
    while (it.hasNext()) {
      StorageDirectory sd = it.next();
      if (!getEditNewFile(sd).renameTo(getEditFile(sd))) {
        //
        // renameTo() fails on Windows if the destination
        // file exists.
        //
        getEditFile(sd).delete();
        if (!getEditNewFile(sd).renameTo(getEditFile(sd))) {
          sd.unlock();
          removeEditsForStorageDir(sd);
          fsimage.updateRemovedDirs(sd);
          it.remove();
        }
      }
    }
    //
    // Reopen all the edits logs.
    //
    open();
  }

  /**
   * Return the name of the edit file
   */
  synchronized File getFsEditName() throws IOException {
    StorageDirectory sd = null;
    for (Iterator<StorageDirectory> it =
           fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();)
      sd = it.next();
    return getEditFile(sd);
  }

  /**
   * Returns the timestamp of the edit log
   */
  synchronized long getFsEditTime() {
    Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
    if(it.hasNext())
      return getEditFile(it.next()).lastModified();
    return 0;
  }

  // sets the initial capacity of the flush buffer.
  static void setBufferCapacity(int size) {
    sizeFlushBuffer = size;
  }

  /**
   * A class to read in blocks stored in the old format. The only two
   * fields in the block were blockid and length.
   */
  static class BlockTwo implements Writable {
    long blkid;
    long len;

    static {                                      // register a ctor
      WritableFactories.setFactory
        (BlockTwo.class,
         new WritableFactory() {
           public Writable newInstance() { return new BlockTwo(); }
         });
    }


    BlockTwo() {
      blkid = 0;
      len = 0;
    }
    /////////////////////////////////////
    // Writable
    /////////////////////////////////////
    public void write(DataOutput out) throws IOException {
      out.writeLong(blkid);
      out.writeLong(len);
    }

    public void readFields(DataInput in) throws IOException {
      this.blkid = in.readLong();
      this.len = in.readLong();
    }
  }

  /** This method is defined for compatibility reason. */
  static private DatanodeDescriptor[] readDatanodeDescriptorArray(DataInput in
      ) throws IOException {
    DatanodeDescriptor[] locations = new DatanodeDescriptor[in.readInt()];
    for (int i = 0; i < locations.length; i++) {
      locations[i] = new DatanodeDescriptor();
      locations[i].readFieldsFromFSEditLog(in);
    }
    return locations;
  }

  static private short readShort(DataInputStream in) throws IOException {
    return Short.parseShort(FSImage.readString(in));
  }

  static private long readLong(DataInputStream in) throws IOException {
    return Long.parseLong(FSImage.readString(in));
  }

  static private Block[] readBlocks(DataInputStream in) throws IOException {
    int numBlocks = in.readInt();
    Block[] blocks = new Block[numBlocks];
    for (int i = 0; i < numBlocks; i++) {
      blocks[i] = new Block();
      blocks[i].readFields(in);
    }
    return blocks;
  }

  /**
   * Stream wrapper that keeps track of the current file position.
   */
  private static class PositionTrackingInputStream extends FilterInputStream {
    private long curPos = 0;
    private long markPos = -1;

    public PositionTrackingInputStream(InputStream is) {
      super(is);
    }

    public int read() throws IOException {
      int ret = super.read();
      if (ret != -1) curPos++;
      return ret;
    }

    public int read(byte[] data) throws IOException {
      int ret = super.read(data);
      if (ret > 0) curPos += ret;
      return ret;
    }

    public int read(byte[] data, int offset, int length) throws IOException {
      int ret = super.read(data, offset, length);
      if (ret > 0) curPos += ret;
      return ret;
    }

    public void mark(int limit) {
      super.mark(limit);
      markPos = curPos;
    }

    public void reset() throws IOException {
      if (markPos == -1) {
        throw new IOException("Not marked!");
      }
      super.reset();
      curPos = markPos;
      markPos = -1;
    }

    public long getPos() {
      return curPos;
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.FSEditLog$BlockTwo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.