Package com.sleepycat.je.recovery

Source Code of com.sleepycat.je.recovery.RecoveryManager

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2002-2005
*      Sleepycat Software.  All rights reserved.
*
* $Id: RecoveryManager.java,v 1.189.4.1 2005/10/22 05:34:19 mark Exp $
*/

package com.sleepycat.je.recovery;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.CheckpointConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.TransactionConfig;
import com.sleepycat.je.cleaner.UtilizationTracker;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.latch.Latch;
import com.sleepycat.je.log.CheckpointFileReader;
import com.sleepycat.je.log.FileManager;
import com.sleepycat.je.log.INFileReader;
import com.sleepycat.je.log.LNFileReader;
import com.sleepycat.je.log.LastFileReader;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.ChildReference;
import com.sleepycat.je.tree.DIN;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.Key;
import com.sleepycat.je.tree.LN;
import com.sleepycat.je.tree.Node;
import com.sleepycat.je.tree.SearchResult;
import com.sleepycat.je.tree.TrackingInfo;
import com.sleepycat.je.tree.Tree;
import com.sleepycat.je.tree.TreeLocation;
import com.sleepycat.je.tree.WithRootLatched;
import com.sleepycat.je.txn.LockType;
import com.sleepycat.je.txn.Txn;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.Tracer;

/**
*
*/
public class RecoveryManager {
    private static final String TRACE_DUP_ROOT_REPLACE =
        "DupRootRecover:";
    private static final String TRACE_LN_REDO = "LNRedo:";
    private static final String TRACE_LN_UNDO = "LNUndo";
    private static final String TRACE_IN_REPLACE = "INRecover:";
    private static final String TRACE_ROOT_REPLACE = "RootRecover:";
    private static final String TRACE_IN_DEL_REPLAY = "INDelReplay:";
    private static final String TRACE_IN_DUPDEL_REPLAY = "INDupDelReplay:";
    private static final String TRACE_ROOT_DELETE = "RootDelete:";

    private static final int CLEAR_INCREMENT = 50;

    private EnvironmentImpl env;
    private int readBufferSize;
    private RecoveryInfo info;       // stat info
    private Set committedTxnIds;     // committed txns
    private Set abortedTxnIds;       // aborted txns
    private Map preparedTxns;        // txnid -> prepared Txn
    private Set inListRebuildDbIds;  // dbs for which we have to rebuild the
                                     // in memory IN list.

    private Level detailedTraceLevel; // level value for detailed trace msgs
    private Map fileSummaryLsns;      // file number -> LSN of FileSummaryLN
    private int inListClearCounter;   // governs intermediate IN list clearing

    /**
     * Make a recovery manager
     */
    public RecoveryManager(EnvironmentImpl env)
        throws DatabaseException {

        this.env = env;
        DbConfigManager cm = env.getConfigManager();
        readBufferSize =
            cm.getInt(EnvironmentParams.LOG_ITERATOR_READ_SIZE);
        committedTxnIds = new HashSet();
        abortedTxnIds = new HashSet();
  preparedTxns = new HashMap();
        inListRebuildDbIds = new HashSet();
        fileSummaryLsns = new HashMap();

        /*
         * Figure out the level to use for detailed trace messages, by choosing
         * the more verbose of the recovery manager's trace setting vs the
         * general trace setting.
         */
        detailedTraceLevel =
            Tracer.parseLevel(env,
                              EnvironmentParams.JE_LOGGING_LEVEL_RECOVERY);
    }
   
    /**
     * Look for an existing log and use it to create an in memory structure for
     * accessing existing databases. The file manager and logging system are
     * only available after recovery.
     * @return RecoveryInfo statistics about the recovery process.
     */
    public RecoveryInfo recover(boolean readOnly)
        throws DatabaseException {

        info = new RecoveryInfo();

        try {
            FileManager fileManager = env.getFileManager();
      DbConfigManager configManager = env.getConfigManager();
      boolean forceCheckpoint =
    configManager.getBoolean
    (EnvironmentParams.ENV_RECOVERY_FORCE_CHECKPOINT);
            if (fileManager.filesExist()) {

                /*
                 * Establish the location of the end of the log. After this, we
                 * can write to the log. No Tracer calls are allowed until
                 * after this point is established in the log.
                 */
                findEndOfLog(readOnly);
                Tracer.trace(Level.CONFIG, env,
                             "Recovery underway, found end of log");
       
                /*
                 * Establish the location of the root, the last checkpoint, and
                 * the first active LSN by finding the last checkpoint.
                 */
                findLastCheckpoint();
    env.getLogManager().setLastLsnAtRecovery
        (fileManager.getLastUsedLsn());
                Tracer.trace(Level.CONFIG, env,
                             "Recovery checkpoint search, " +
                             info);

                /* Read in the root. */
                env.readMapTreeFromLog(info.useRootLsn);

                /* Rebuild the in memory tree from the log. */
                buildTree();
            } else {

                /*
                 * Nothing more to be done. Enable publishing of debug log
                 * messages to the database log.
                 */
                env.enableDebugLoggingToDbLog();
                Tracer.trace(Level.CONFIG, env, "Recovery w/no files.");
                env.logMapTreeRoot();

    /*
     * Always force a checkpoint during creation.
     */
    forceCheckpoint = true;
            }

      if (preparedTxns.size() > 0) {
    Tracer.trace(Level.INFO, env,
           "There are " + preparedTxns.size() +
           " prepared but unfinished txns.");

    /*
     * We don't need this set any more since these are all
     * registered with the TxnManager now.
     */
    preparedTxns = null;
      }

            /*
             * At this point, we've recovered (or there were no log files at
             * all. Write a checkpoint into the log.
             *
             * NOTE: The discussion of deltas below may be obsolete now that
             * we use dirty bits to determine what to include in a delta.
             * However, we still want to disallow deltas to flush full versions
             * after a crash.
             *
             * Don't allow deltas, because the delta-determining scheme that
             * compares child entries to the last full LSN doesn't work in
             * recovery land. New child entries may have an earlier LSN than
             * the owning BIN's last full, because of the act of splicing in
             * LNs during recovery.
             *
             * For example, suppose that during LN redo, bin 10 was split into
             * bin 10 and bin 12. That splitting causes a full log.  Then later
             * on, the redo splices LN x, which is from before the last full of
             * bin 10, into bin 10. If we checkpoint allowing deltas after
             * recovery finishes, we won't pick up the LNx diff, because that
             * LN is an earlier LSN than the split-induced full log entry of
             * bin 10.
             */
            if (!readOnly &&
    (env.getLogManager().getLastLsnAtRecovery() !=
     info.checkpointEndLsn ||
     forceCheckpoint)) {
              CheckpointConfig config = new CheckpointConfig();
              config.setForce(true);
              config.setMinimizeRecoveryTime(true);
                env.invokeCheckpoint
                    (config,
                     false, // flushAll
                     "recovery");
            }

        } catch (IOException e) {
            Tracer.trace(env, "RecoveryManager", "recover",
                         "Couldn't recover", e);
            throw new RecoveryException(env, "Couldn't recover: " +
                                        e.getMessage(), e);
        } finally {
      Tracer.trace(Level.CONFIG, env, "Recovery finished: " + info);
        }

        return info;
    }

    /**
     * Find the end of the log, initialize the FileManager. While we're
     * perusing the log, return the last checkpoint LSN if we happen to see it.
     */
    private void findEndOfLog(boolean readOnly)
        throws IOException, DatabaseException {

        LastFileReader reader = new LastFileReader(env, readBufferSize);

        /*
         * Tell the reader to iterate through the log file until we hit the end
         * of the log or an invalid entry.
         * Remember the last seen CkptEnd, and the first CkptStart with no
         * following CkptEnd.
         */
        while (reader.readNextEntry()) {
            LogEntryType type = reader.getEntryType();
            if (LogEntryType.LOG_CKPT_END.equals(type)) {
                info.checkpointEndLsn = reader.getLastLsn();
                info.partialCheckpointStartLsn = DbLsn.NULL_LSN;
            } else if (LogEntryType.LOG_CKPT_START.equals(type)) {
                if (info.partialCheckpointStartLsn == DbLsn.NULL_LSN) {
                    info.partialCheckpointStartLsn = reader.getLastLsn();
                }
            }
        }

        assert (reader.getLastValidLsn() != reader.getEndOfLog()):
            "lastUsed=" + DbLsn.getNoFormatString(reader.getLastValidLsn()) +
            " end=" + DbLsn.getNoFormatString(reader.getEndOfLog());


        /* Now truncate if necessary. */
        if (!readOnly) {
            reader.setEndOfFile();
        }

        /* Tell the fileManager where the end of the log is. */
        info.lastUsedLsn = reader.getLastValidLsn();
        info.nextAvailableLsn = reader.getEndOfLog();
        info.nRepeatIteratorReads += reader.getNRepeatIteratorReads();
        env.getFileManager().setLastPosition(info.nextAvailableLsn,
                                             info.lastUsedLsn,
                                             reader.getPrevOffset());

        /*
         * Now the logging system is initialized and can do more
         * logging. Enable publishing of debug log messages to the database
         * log.
         */
        env.enableDebugLoggingToDbLog();
    }

    /**
     * Find the last checkpoint and establish the firstActiveLsn point,
     * checkpoint start, and checkpoint end.
     */
    private void findLastCheckpoint()
        throws IOException, DatabaseException {

        /*
         * The checkpointLsn might have been already found when establishing
         * the end of the log.  If it was found, then partialCheckpointStartLsn
         * was also found.  If it was not found, search backwards for it now
         * and also set partialCheckpointStartLsn.
         */
        if (info.checkpointEndLsn == DbLsn.NULL_LSN) {
           
            /*
             * Search backwards though the log for a checkpoint end entry and a
             * root entry.
             */
            CheckpointFileReader searcher =
                new CheckpointFileReader(env, readBufferSize, false,
                                         info.lastUsedLsn, DbLsn.NULL_LSN,
                                         info.nextAvailableLsn);

            while (searcher.readNextEntry()) {

                /*
                 * Continue iterating until we find a checkpoint end entry.
                 * While we're at it, remember the last root seen in case we
                 * don't find a checkpoint end entry.
                 */
                if (searcher.isCheckpointEnd()) {

                    /*
                     * We're done, the checkpoint end will tell us where the
                     * root is.
                     */
                    info.checkpointEndLsn = searcher.getLastLsn();
                    break;
                } else if (searcher.isCheckpointStart()) {

                    /*
                     * Remember the first CkptStart following the CkptEnd.
                     */
                    info.partialCheckpointStartLsn = searcher.getLastLsn();

                } else if (searcher.isRoot()) {

                    /*
                     * Save the last root that was found in the log in case we
                     * don't see a checkpoint.
                     */
                    if (info.useRootLsn == DbLsn.NULL_LSN) {
                        info.useRootLsn = searcher.getLastLsn();
                    }
                }
            }
            info.nRepeatIteratorReads += searcher.getNRepeatIteratorReads();
        }

        /*
         * If we haven't found a checkpoint, we'll have to recover without
         * one. At a minimium, we must have found a root.
         */
        if (info.checkpointEndLsn == DbLsn.NULL_LSN) {
            info.checkpointStartLsn = DbLsn.NULL_LSN;
            info.firstActiveLsn = DbLsn.NULL_LSN;
        } else {
            /* Read in the checkpoint entry. */
            CheckpointEnd checkpointEnd =
                (CheckpointEnd) (env.getLogManager().get
         (info.checkpointEndLsn));
            info.checkpointEnd = checkpointEnd;
            info.checkpointStartLsn = checkpointEnd.getCheckpointStartLsn();
            info.firstActiveLsn = checkpointEnd.getFirstActiveLsn();
            if (checkpointEnd.getRootLsn() != DbLsn.NULL_LSN) {
                info.useRootLsn = checkpointEnd.getRootLsn();
            }

            /* Init the checkpointer's id sequence and FirstActiveLsn.*/
            env.getCheckpointer().setCheckpointId(checkpointEnd.getId());
            env.getCheckpointer().setFirstActiveLsn
                (checkpointEnd.getFirstActiveLsn());
        }
        if (info.useRootLsn == DbLsn.NULL_LSN) {
            throw new RecoveryException
    (env,
     "This environment's log file has no root. Since the root " +
     "is the first entry written into a log at environment " +
     "creation, this should only happen if the initial creation " +
     "of the environment was never checkpointed or synced. " +
     "Please move aside the existing log files to allow the " +
     "creation of a new environment");
        }
    }

    /**
     * Use the log to recreate an in memory tree.
     */
    private void buildTree()
        throws IOException, DatabaseException {

        inListClearCounter = 0;

        /*
         * Pass 1: Read all map database INs, find largest node id before any
         * possiblity of splits, find largest txn Id before any need for a root
         * update (which would use an AutoTxn)
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(1) + "read map INs");
  long start = System.currentTimeMillis();
        readINsAndTrackIds(info.checkpointStartLsn);
  long end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(1, start, end) +
         info.toString());

        /*
         * Pass 2: Read map BINDeltas.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(2) +
                     "read map BINDeltas");
  start = System.currentTimeMillis();
        info.numOtherINs += readINs(info.checkpointStartLsn,
                                    true,   // mapping tree only
                                    LogEntryType.LOG_BIN_DELTA,
                                    null,
            null,
                                    true); // requireExactMatch
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(2, start, end) +
         info.toString());

        /*
         * Pass 3: Undo all aborted map LNs. Also, read and remember all
         * committed transaction ids.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(3) + "undo map LNs");
  start = System.currentTimeMillis();
        Set mapLNSet = new HashSet();
        mapLNSet.add(LogEntryType.LOG_MAPLN_TRANSACTIONAL);
        mapLNSet.add(LogEntryType.LOG_TXN_COMMIT);
        mapLNSet.add(LogEntryType.LOG_TXN_ABORT);
        mapLNSet.add(LogEntryType.LOG_TXN_PREPARE);
        undoLNs(info, mapLNSet);
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(3, start, end) +
         info.toString());

        /*
         * Pass 4: Replay all mapLNs, mapping tree in place now. Use the set of
         * committed txns found from pass 3.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(4) + "redo map LNs");
  start = System.currentTimeMillis();
        mapLNSet.add(LogEntryType.LOG_MAPLN);
        redoLNs(info, mapLNSet);
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(4, start, end) +
         info.toString());

        /*
         * Pass 5: Read all other INs.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(5) + "read other INs");
  start = System.currentTimeMillis();
        info.numOtherINs += readINs(info.checkpointStartLsn,
                                    false,  // mapping tree only
                                    LogEntryType.LOG_IN,
                                    LogEntryType.LOG_BIN,
                                    LogEntryType.LOG_IN_DELETE_INFO,
                                    false); // requireExactMatch
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(5, start, end) +
         info.toString());

        /*
         * Pass 6: Read BIN Deltas.
         * BINDeltas must be processed after all INs so the delta is properly
         * applied to the last version. For example, suppose BINDeltas were not
         * done in a later pass, the tree is INa->BINb, and the log has
         *       INa
         *       BINDelta for BINb
         *       INa
         * the splicing in of the second INa would override the BINDelta.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(6) + "read BINDeltas");
  start = System.currentTimeMillis();
        info.numBinDeltas = readINs(info.checkpointStartLsn,
                                    false,   // mapping tree only
                                    LogEntryType.LOG_BIN_DELTA,
                                    null,
            null,
                                    true); // requireExactMatch
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(6, start, end) +
         info.toString());

        /*
         * Pass 7: Replay DINs and DBINs.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(7) + "read dup INs");
  start = System.currentTimeMillis();
        info.numDuplicateINs += readINs(info.checkpointStartLsn,
                                        false,   // mapping tree only
                                        LogEntryType.LOG_DIN,
                                        LogEntryType.LOG_DBIN,
                                        LogEntryType.LOG_IN_DUPDELETE_INFO,
                                        true)// requireExactMatch
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(7, start, end) +
         info.toString());

        /*
         * Pass 8: replay dup BINDeltas.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(8) +
                     "read dup BINDeltas");
  start = System.currentTimeMillis();
        info.numBinDeltas += readINs(info.checkpointStartLsn,
                                     false,   // mapping tree only
                                     LogEntryType.LOG_DUP_BIN_DELTA,
                                     null,
             null,
                                     true);   // requireExactMatch
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(8, start, end) +
         info.toString());

        /*
         * Rebuild the in memory IN list.  Once the tree is complete we can
         * invoke the evictor.  The evictor will also be invoked during the
         * undo and redo passes.
         */
        rebuildINList();
        env.invokeEvictor();

        /*
         * Pass 9: Undo aborted LNs. No need to collect committed txn ids
         * again, was done in pass 3.
         */
        Tracer.trace(Level.CONFIG, env, passStartHeader(9) + "undo LNs");
  start = System.currentTimeMillis();
        Set lnSet = new HashSet();
        lnSet.add(LogEntryType.LOG_LN_TRANSACTIONAL);
        lnSet.add(LogEntryType.LOG_NAMELN_TRANSACTIONAL);
        lnSet.add(LogEntryType.LOG_DEL_DUPLN_TRANSACTIONAL);
        lnSet.add(LogEntryType.LOG_DUPCOUNTLN_TRANSACTIONAL);

        undoLNs(info, lnSet);
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(9, start, end) +
         info.toString());

        /* Pass 10: Replay LNs. Also read non-transactional LNs. */
        Tracer.trace(Level.CONFIG, env, passStartHeader(10) + "redo LNs");
  start = System.currentTimeMillis();
        lnSet.add(LogEntryType.LOG_LN);
        lnSet.add(LogEntryType.LOG_NAMELN);
        lnSet.add(LogEntryType.LOG_DEL_DUPLN);
        lnSet.add(LogEntryType.LOG_DUPCOUNTLN);
        lnSet.add(LogEntryType.LOG_FILESUMMARYLN);
        redoLNs(info, lnSet);
  end = System.currentTimeMillis();
        Tracer.trace(Level.CONFIG, env, passEndHeader(10, start, end) +
         info.toString());
    }

    /*
     * Read every internal node and IN DeleteInfo in the mapping tree and place
     * in the in-memory tree.
     */
    private void readINsAndTrackIds(long rollForwardLsn)
        throws IOException, DatabaseException {

        INFileReader reader =
            new INFileReader(env,
                             readBufferSize,
                             rollForwardLsn,        // start lsn
                             info.nextAvailableLsn, // end lsn
                             true,   // track node and db ids
           false,  // map db only
                             info.partialCheckpointStartLsn,
                             fileSummaryLsns);
        reader.addTargetType(LogEntryType.LOG_IN);
        reader.addTargetType(LogEntryType.LOG_BIN);
        reader.addTargetType(LogEntryType.LOG_IN_DELETE_INFO);

        try {
            info.numMapINs = 0;
            DbTree dbMapTree = env.getDbMapTree();

            /*
       * Process every IN, INDeleteInfo, and INDupDeleteInfo in the
       * mapping tree.
       */
            while (reader.readNextEntry()) {
                DatabaseId dbId = reader.getDatabaseId();
                if (dbId.equals(DbTree.ID_DB_ID)) {
                    DatabaseImpl db = dbMapTree.getDb(dbId);
                    replayOneIN(reader, db, false);
                    info.numMapINs++;
                }
            }

            /*
             * Update node id and database sequences. Use either the maximum of
             * the ids seen by the reader vs the ids stored in the checkpoint.
             */
            info.useMaxNodeId = reader.getMaxNodeId();
            info.useMaxDbId = reader.getMaxDbId();
            info.useMaxTxnId = reader.getMaxTxnId();
            if (info.checkpointEnd != null) {
                if (info.useMaxNodeId < info.checkpointEnd.getLastNodeId()) {
                    info.useMaxNodeId = info.checkpointEnd.getLastNodeId();
                }
                if (info.useMaxDbId < info.checkpointEnd.getLastDbId()) {
                    info.useMaxDbId = info.checkpointEnd.getLastDbId();
                }
                if (info.useMaxTxnId < info.checkpointEnd.getLastTxnId()) {
                    info.useMaxTxnId = info.checkpointEnd.getLastTxnId();
                }
            }

            Node.setLastNodeId(info.useMaxNodeId);
            env.getDbMapTree().setLastDbId(info.useMaxDbId);
            env.getTxnManager().setLastTxnId(info.useMaxTxnId);

            info.nRepeatIteratorReads += reader.getNRepeatIteratorReads();
        } catch (Exception e) {
            traceAndThrowException(reader.getLastLsn(), "readMapIns", e);
        }
    }

    /**
     * Read INs and process.
     */
    private int readINs(long rollForwardLsn,
                        boolean mapDbOnly,
                        LogEntryType inType1,
                        LogEntryType inType2,
                        LogEntryType inType3,
                        boolean requireExactMatch)
        throws IOException, DatabaseException {

        // don't need to track NodeIds
        INFileReader reader =
      new INFileReader(env,
                             readBufferSize,
                             rollForwardLsn,                 // startlsn
                             info.nextAvailableLsn,          // finish
                             false,
           mapDbOnly,
                             info.partialCheckpointStartLsn,
                             fileSummaryLsns);
        if (inType1 != null) {
            reader.addTargetType(inType1);
        }
        if (inType2 != null) {
            reader.addTargetType(inType2);
        }
        if (inType3 != null) {
            reader.addTargetType(inType3);
        }

        int numINsSeen = 0;
        try {

            /*
             * Read all non-provisional INs, and process if they don't belong
             * to the mapping tree.
             */
            DbTree dbMapTree = env.getDbMapTree();
            while (reader.readNextEntry()) {
                DatabaseId dbId = reader.getDatabaseId();
                boolean isMapDb = dbId.equals(DbTree.ID_DB_ID);
                boolean isTarget = false;

                if (mapDbOnly && isMapDb) {
                    isTarget = true;
                } else if (!mapDbOnly && !isMapDb) {
                    isTarget = true;
                }
                if (isTarget) {
                    DatabaseImpl db = dbMapTree.getDb(dbId);
                    if (db == null) {
                        // This db has been deleted, ignore the entry.
                    } else {
                        replayOneIN(reader, db, requireExactMatch);
                        numINsSeen++;

                        /*
                         * Add any db that we encounter IN's for because
                         * they'll be part of the in-memory tree and therefore
                         * should be included in the INList rebuild.
                         */
                        inListRebuildDbIds.add(dbId);
                    }
                }
            }

            info.nRepeatIteratorReads += reader.getNRepeatIteratorReads();
            return numINsSeen;
        } catch (Exception e) {
            traceAndThrowException(reader.getLastLsn(), "readNonMapIns", e);
            return 0;
        }
    }

    /**
     * Get an IN from the reader, set its database, and fit into tree.
     */
    private void replayOneIN(INFileReader reader,
                             DatabaseImpl db,
                             boolean requireExactMatch)
        throws DatabaseException {
       
        if (reader.isDeleteInfo()) {
            /* Last entry is a delete, replay it. */
            replayINDelete(db,
                           reader.getDeletedNodeId(),
                           false,
                           reader.getDeletedIdKey(),
                           null,
                           reader.getLastLsn());
        } else if (reader.isDupDeleteInfo()) {
            /* Last entry is a dup delete, replay it. */
            replayINDelete(db,
                           reader.getDupDeletedNodeId(),
                           true,
                           reader.getDupDeletedMainKey(),
                           reader.getDupDeletedDupKey(),
                           reader.getLastLsn());
        } else {

            /*
       * Last entry is a node, replay it. Now, we should really call
       * IN.postFetchInit, but we want to do something different from the
       * faulting-in-a-node path, because we don't want to put the IN on
       * the in memory list, and we don't want to search the db map tree,
       * so we have a IN.postRecoveryInit.  Note also that we have to
       * pass the LSN of the current log entry and also the LSN of the IN
       * in question. The only time these differ is when the log entry is
       * a BINDelta -- then the IN's LSN is the last full version LSN,
       * and the log LSN is the current log entry.
             */
            IN in = reader.getIN();
            long inLsn = reader.getLsnOfIN();
            in.postRecoveryInit(db, inLsn);
            in.latch();
            replaceOrInsert(db, in, reader.getLastLsn(), inLsn,
                            requireExactMatch);
        }

        /*
   * Although we're careful to not place INs instantiated from the log on
   * the IN list, we do call normal tree search methods when checking
   * agains the active tree. The INList builds up from the faulting in of
   * nodes this way. However, some of those nodes become obsolete as we
   * splice in newer versions, so the INList becomes too large and can
   * pose a problem by causing us to overflow memory bounds.  Some
   * possible solutions are to create an environment wide recovery mode,
   * or to put special logic into the normal faulting-in path to know
   * that we're in recovery. Because we don't want to impact normal code
   * paths, we're going to just periodically clear the INList here. The
   * INList will be regenerated at the end of recovery.
         */
        if ((++inListClearCounter % CLEAR_INCREMENT) == 0) {
            env.getInMemoryINs().clear();       
        }
    }

    /**
     * Undo all aborted LNs. To do so, walk the log backwards, keeping a
     * collection of committed txns. If we see a log entry that doesn't have a
     * committed txn, undo it.
     */
    private void undoLNs(RecoveryInfo info, Set lnTypes)
        throws IOException, DatabaseException {

  long firstActiveLsn = info.firstActiveLsn;
  long lastUsedLsn = info.lastUsedLsn;
  long endOfFileLsn = info.nextAvailableLsn;
        /* Set up a reader to pick up target log entries from the log. */
        LNFileReader reader =
            new LNFileReader(env, readBufferSize, lastUsedLsn,
                             false, endOfFileLsn, firstActiveLsn, null);

        Iterator iter = lnTypes.iterator();
        while (iter.hasNext()) {
            LogEntryType lnType = (LogEntryType) iter.next();
            reader.addTargetType(lnType);
        }

        Map countedFileSummaries = new HashMap(); // TxnNodeId -> file number
        Set countedAbortLsnNodes = new HashSet(); // set of TxnNodeId

        DbTree dbMapTree = env.getDbMapTree();
        TreeLocation location = new TreeLocation();
        try {

            /*
             * Iterate over the target LNs and commit records, constructing
             * tree.
             */
            while (reader.readNextEntry()) {
                if (reader.isLN()) {

                    /* Get the txnId from the log entry. */
                    Long txnId = reader.getTxnId();

                    /*
                     * If this node is not in a committed txn, examine it to
                     * see if it should be undone.
                     */
                    if (!committedTxnIds.contains(txnId)) {

      /*
       * Invoke the evictor to reduce memory consumption.
       */
      env.invokeEvictor();

      LN ln = reader.getLN();
      long logLsn = reader.getLastLsn();
      long abortLsn = reader.getAbortLsn();
      boolean abortKnownDeleted =
          reader.getAbortKnownDeleted();
      DatabaseId dbId = reader.getDatabaseId();
      DatabaseImpl db = dbMapTree.getDb(dbId);
                       
      /* Database may be null if it's been deleted. */
      if (db != null) {
          ln.postFetchInit(db, logLsn);
          try {
                                undo(detailedTraceLevel,
                                     db,
                                     location,
                                     ln,
                                     reader.getKey(),
                                     reader.getDupTreeKey(),
                                     logLsn,
                                     abortLsn,
                                     abortKnownDeleted,
                                     info,
                                     true);
          } finally {
        if (location.bin != null) {
            location.bin.releaseLatchIfOwner();
        }
          }
          /* Undo utilization info. */
          TxnNodeId txnNodeId =
        new TxnNodeId(reader.getNodeId(),
                txnId.longValue());
          undoUtilizationInfo(ln, logLsn, abortLsn,
            abortKnownDeleted,
            txnNodeId,
            countedFileSummaries,
            countedAbortLsnNodes);

          /*
           * Add any db that we encounter LN's for because
           * they'll be part of the in-memory tree and
           * therefore should be included in the INList
           * rebuild.
           */
          inListRebuildDbIds.add(dbId);
      }
        }
                } else if (reader.isPrepare()) {

        /*
         * The entry just read is a prepare record.  There should
         * be no lock conflicts during recovery, but just in case
         * there are, we set the locktimeout to 0.
         */
        long prepareId = reader.getTxnPrepareId();
        Long prepareIdL = new Long(prepareId);
        if (!committedTxnIds.contains(prepareIdL) &&
      !abortedTxnIds.contains(prepareIdL)) {
      TransactionConfig txnConf = new TransactionConfig();
      Txn preparedTxn = new Txn(env, txnConf, prepareId);
      preparedTxn.setLockTimeout(0);
      preparedTxns.put(prepareIdL, preparedTxn);
      env.getTxnManager().registerXATxn
          (reader.getTxnPrepareXid(), preparedTxn, true);
      Tracer.trace(Level.INFO, env,
             "Found unfinished prepare record: id: " +
             reader.getTxnPrepareId() +
             " Xid: " + reader.getTxnPrepareXid());
        }
                } else if (reader.isAbort()) {
        /* The entry just read is an abort record. */
        abortedTxnIds.add(new Long(reader.getTxnAbortId()));
    } else {
                    /* The entry just read is a commit record. */
                    committedTxnIds.add(new Long(reader.getTxnCommitId()));
                }
            }
            info.nRepeatIteratorReads += reader.getNRepeatIteratorReads();
        } catch (Exception e) {
            traceAndThrowException(reader.getLastLsn(), "undoLNs", e);
        }
    }

    /**
     * Apply all committed LNs.
     * @param rollForwardLsn start redoing from this point
     * @param lnType1 targetted LN
     * @param lnType2 targetted LN
     */
    private void redoLNs(RecoveryInfo info, Set lnTypes)
        throws IOException, DatabaseException {

  long endOfFileLsn = info.nextAvailableLsn;
  long rollForwardLsn = info.checkpointStartLsn;
        /* Set up a reader to pick up target log entries from the log */
        LNFileReader reader =
            new LNFileReader(env, readBufferSize, rollForwardLsn,
                             true, DbLsn.NULL_LSN, endOfFileLsn, null);

        Iterator iter = lnTypes.iterator();
        while (iter.hasNext()) {
            LogEntryType lnType = (LogEntryType) iter.next();
            reader.addTargetType(lnType);
        }

        Set countedAbortLsnNodes = new HashSet(); // set of TxnNodeId

        DbTree dbMapTree = env.getDbMapTree();
        TreeLocation location = new TreeLocation();
        try {

            /* Iterate over the target LNs and construct in- memory tree. */
            while (reader.readNextEntry()) {
                if (reader.isLN()) {

                    /* Get the txnId from the log entry. */
                    Long txnId = reader.getTxnId();
               
                    /*
                     * If this LN is in a committed txn, or if it's a
                     * non-transactional LN, redo it.
                     */
        boolean processThisLN = false;
        boolean lnIsCommitted = false;
        boolean lnIsPrepared = false;
        Txn preparedTxn = null;
        if (txnId == null) {
      processThisLN = true;
        } else {
      lnIsCommitted = committedTxnIds.contains(txnId);
      if (!lnIsCommitted) {
          preparedTxn = (Txn) preparedTxns.get(txnId);
          lnIsPrepared = preparedTxn != null;
      }
      if (lnIsCommitted || lnIsPrepared) {
          processThisLN = true;
      }
        }
        if (processThisLN) {

                        /* Invoke the evictor to reduce memory consumption. */
                        env.invokeEvictor();

                        LN ln = reader.getLN();
                        DatabaseId dbId = reader.getDatabaseId();
                        DatabaseImpl db = dbMapTree.getDb(dbId);
                        long logLsn = reader.getLastLsn();
                        long treeLsn = DbLsn.NULL_LSN;

                        /* Database may be null if it's been deleted. */
                        if (db != null) {
                            ln.postFetchInit(db, logLsn);

          if (preparedTxn != null) {
        preparedTxn.addLogInfo(logLsn);

        /*
         * We're reconstructing a prepared, but not
         * finished, transaction.  We know that there
         * was a write lock on this LN since it exists
         * in the log under this txnId.
         */
        preparedTxn.lock(ln.getNodeId(),
             LockType.WRITE,
             db);
        preparedTxn.setPrepared(true);
          }

                            treeLsn = redo(db,
                                           location,
                                           ln,
                                           reader.getKey(),
                                           reader.getDupTreeKey(),
                                           logLsn,
                                           info);

                            /*
                             * Add any db that we encounter LN's for because
                             * they'll be part of the in-memory tree and
                             * therefore should be included in the INList
                             * rebuild.
                             */
                            inListRebuildDbIds.add(dbId);
                        }

                        /* Redo utilization info. */
                        TxnNodeId txnNodeId = null;
                        if (txnId != null) {
                            txnNodeId = new TxnNodeId(reader.getNodeId(),
                                                      txnId.longValue());
                        }
                        redoUtilizationInfo(logLsn, treeLsn,
                                            reader.getAbortLsn(),
                                            reader.getAbortKnownDeleted(),
                                            ln, txnNodeId,
                                            countedAbortLsnNodes);
                    }
                }
            }
            info.nRepeatIteratorReads += reader.getNRepeatIteratorReads();
        } catch (Exception e) {
            traceAndThrowException(reader.getLastLsn(), "redoLns", e);
        }
    }

    /**
     * Rebuild the in memory inList with INs that have been made resident by
     * the recovery process.
     */
    private void rebuildINList()
        throws DatabaseException {

        env.getInMemoryINs().clear();               // empty out
        env.getDbMapTree().rebuildINListMapDb();    // scan map db

        /* For all the dbs that we read in recovery, scan for resident INs. */
        Iterator iter = inListRebuildDbIds.iterator();
        while (iter.hasNext()) {
            DatabaseId dbId = (DatabaseId) iter.next();
            /* We already did the map tree, don't do it again. */
            if (!dbId.equals(DbTree.ID_DB_ID)) {
                DatabaseImpl db = env.getDbMapTree().getDb(dbId);
                db.getTree().rebuildINList();
            }
        }
    }

    /* Struct to hold a nodeId/txnId tuple */
    private static class TxnNodeId {
        long nodeId;
        long txnId;
       
        TxnNodeId(long nodeId, long txnId) {
            this.nodeId = nodeId;
            this.txnId = txnId;
        }

        /**
         * Compare two TxnNodeId objects
         */
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }

            if (!(obj instanceof TxnNodeId)) {
                return false;
            }

            return ((((TxnNodeId) obj).txnId == txnId) &&
                    (((TxnNodeId) obj).nodeId == nodeId));
        }

        public int hashCode() {
            return (int) (txnId + nodeId);
        }

        public String toString() {
            return "txnId=" + txnId + "/nodeId=" + nodeId;
        }
    }

    /*
     * Tree manipulation methods.
     */

    /**
     * Recover an internal node. If inFromLog is:
     *       - not found, insert it in the appropriate location.
     *       - if found and there is a physical match (LSNs are the same)
     *         do nothing.
     *       - if found and there is a logical match (LSNs are different,
     *         another version of this IN is in place, replace the found node
     *         with the node read from the log only if the log version's
     *         LSN is greater.
     * InFromLog should be latched upon entering this method and it will
     * not be latched upon exiting.
     *
     * @param inFromLog - the new node to put in the tree.  The identifier key
     * and node id are used to find the existing version of the node.
     * @param logLsn - the location of log entry in in the log.
     * @param inLsn LSN of this in -- may not be the same as the log LSN if
     * the current entry is a BINDelta
     * @param requireExactMatch - true if we won't place this node in the tree
     * unless we find exactly that parent. Used for BINDeltas, where we want
     * to only apply the BINDelta to that exact node.
     */
    private void replaceOrInsert(DatabaseImpl db,
                                 IN inFromLog,
                                 long logLsn,
                                 long inLsn,
                                 boolean requireExactMatch)
        throws DatabaseException {

        List trackingList = null;
        try {

            /*
             * We must know a priori if this node is the root. We can't infer
             * that status from a search of the existing tree, because
             * splitting the root is done by putting a node above the old root.
             * A search downward would incorrectly place the new root below the
             * existing tree.
             */
            if (inFromLog.isRoot()) {
                if (inFromLog.containsDuplicates()) {
                    replaceOrInsertDuplicateRoot(db, (DIN) inFromLog, logLsn);
                } else {
                    replaceOrInsertRoot(db, inFromLog, logLsn);
                }
            } else {
                /*
                 * Look for a parent. The call to getParentNode unlatches node.
                 * Then place inFromLog in the tree if appropriate.
                 */
                trackingList = new ArrayList();
                replaceOrInsertChild(db, inFromLog, logLsn, inLsn,
                                     trackingList, requireExactMatch);
            }
        } catch (Exception e) {
            String trace = printTrackList(trackingList);
            Tracer.trace(db.getDbEnvironment(), "RecoveryManager",
                         "replaceOrInsert", " lsnFromLog:" +
                         DbLsn.getNoFormatString(logLsn) + " " + trace,
                         e);
            throw new DatabaseException("lsnFromLog=" +
                                        DbLsn.getNoFormatString(logLsn), e);
        } finally {
            inFromLog.releaseLatchIfOwner();
       
            assert (Latch.countLatchesHeld() == 0):
                Latch.latchesHeldToString() +
                "LSN = " + DbLsn.toString(logLsn) +
                " inFromLog = " + inFromLog.getNodeId();

        }
    }

    /**
     * Dump a tracking list into a string.
     */
    private String printTrackList(List trackingList) {
        if (trackingList != null) {
            StringBuffer sb = new StringBuffer();
            Iterator iter = trackingList.iterator();
            sb.append("Trace list:");
            sb.append('\n');
            while (iter.hasNext()) {
                sb.append((TrackingInfo) iter.next());
                sb.append('\n');
            }
            return sb.toString();
        } else {
            return null;
        }
    }

    /**
     * Replay an IN delete. Remove an entry from an IN to reflect a reverse
     * split.
     */
    private void replayINDelete(DatabaseImpl db,
        long nodeId,
                                boolean containsDuplicates,
        byte[] mainKey,
        byte[] dupKey,
                                long logLsn)
        throws DatabaseException {

        boolean found = false;
        boolean deleted = false;
        Tree tree = db.getTree();
        SearchResult result = new SearchResult();

        try {
            /* Search for the parent of this target node. */
            result = db.getTree().getParentINForChildIN
                (nodeId,
                 containsDuplicates,
                 false, // do not stop at dup tree root
                 mainKey,
                 dupKey,
                 false, // requireExactMatch
                 false, // updateGeneration
                 -1,    // targetLevel
                 null,  // trackingList
                 true); // doFetch

            if (result.parent == null) {
                /* It's null -- we actually deleted the root. */
                tree.withRootLatched(new RootDeleter(tree));
                DbTree dbTree = db.getDbEnvironment().getDbMapTree();
                dbTree.modifyDbRoot(db);
                traceRootDeletion(Level.FINE, db);
                deleted = true;
            } else if (result.exactParentFound) {
                /* Exact match was found -- delete the parent entry. */
                found = true;
                deleted = result.parent.deleteEntry(result.index, false);
            }
        } finally {
            if (result.parent != null) {
                result.parent.releaseLatch();
            }

            traceINDeleteReplay
                (nodeId, logLsn, found, deleted, result.index,
                 containsDuplicates);
        }
    }

    /*
     * RootDeleter lets us clear the rootIN within the root latch.
     */
    private static class RootDeleter implements WithRootLatched {
        Tree tree;
        RootDeleter(Tree tree) {
            this.tree = tree;
        }

        /**
         * @return true if the in-memory root was replaced.
         */
        public IN doWork(ChildReference root)
            throws DatabaseException {

            tree.setRoot(null);
            return null;
        }
    }

    /**
     * If the root of this tree is null, use this IN from the log as a root.
     * Note that we should really also check the LSN of the mapLN, because
     * perhaps the root is null because it's been deleted. However, the replay
     * of all the LNs will end up adjusting the tree correctly.
     *
     * If there is a root, check if this IN is a different LSN and if so,
     * replace it.
     */
    private void replaceOrInsertRoot(DatabaseImpl db, IN inFromLog, long lsn)
        throws DatabaseException {

        boolean success = true;
        Tree tree = db.getTree();
        RootUpdater rootUpdater = new RootUpdater(tree, inFromLog, lsn);
        try {
            /* Run the root updater while the root latch is held. */
            tree.withRootLatched(rootUpdater);

            /* Update the mapLN if necessary */
            if (rootUpdater.updateDone()) {
                EnvironmentImpl env = db.getDbEnvironment();
                env.getDbMapTree().modifyDbRoot(db);
            }
        } catch (Exception e) {
            success = false;
            throw new DatabaseException("lsnFromLog=" +
                                        DbLsn.getNoFormatString(lsn),
                                        e);
        } finally {
            trace(detailedTraceLevel,
                  db, TRACE_ROOT_REPLACE, success, inFromLog,
                  lsn,
                  null,
                  true,
                  rootUpdater.getReplaced(),
                  rootUpdater.getInserted(),
                  rootUpdater.getOriginalLsn(),
      DbLsn.NULL_LSN,
      -1);
        }
    }

    /*
     * RootUpdater lets us replace the tree root within the tree root latch.
     */
    private static class RootUpdater implements WithRootLatched {
        private Tree tree;
        private IN inFromLog;
        private long lsn = DbLsn.NULL_LSN;
        private boolean inserted = false;
        private boolean replaced = false;
        private long originalLsn = DbLsn.NULL_LSN;

        RootUpdater(Tree tree, IN inFromLog, long lsn) {
            this.tree = tree;
            this.inFromLog = inFromLog;
            this.lsn = lsn;
        }

        /**
         * @return true if the in-memory root was replaced.
         */
        public IN doWork(ChildReference root)
            throws DatabaseException {

            ChildReference newRoot =
                new ChildReference(inFromLog, new byte[0], lsn);
            inFromLog.releaseLatch();

            if (root == null) {
                tree.setRoot(newRoot);
                inserted = true;
            } else {
                originalLsn = root.getLsn(); // for debugLog

                /*
                 * The current in-memory root IN is older than the root IN from
                 * the log.
                 */
                if (DbLsn.compareTo(originalLsn, lsn) < 0) {
                    tree.setRoot(newRoot);
                    replaced = true;
                }
            }
            return null;
        }

        boolean updateDone() {
            return inserted || replaced;
        }

        boolean getInserted() {
            return inserted;
        }

        boolean getReplaced() {
            return replaced;
        }

        long getOriginalLsn() {
            return originalLsn;
        }
    }

    /**
     * Recover this root of a duplicate tree.
     */
    private void replaceOrInsertDuplicateRoot(DatabaseImpl db,
                                              DIN inFromLog,
                                              long lsn)
        throws DatabaseException {

        boolean found = true;
        boolean inserted = false;
        boolean replaced = false;
        long originalLsn = DbLsn.NULL_LSN;

        byte[] mainTreeKey = inFromLog.getMainTreeKey();
        IN parent = null;
        int index = -1;
        boolean success = false;
        try {

            /*
             * Allow splits since the parent BIN of this DIN may be full.
             * [#13435]
             */
            parent = db.getTree().searchSplitsAllowed
                (mainTreeKey, -1, true /*updateGeneration*/);
            assert parent instanceof BIN;

      ChildReference newRef =
    new ChildReference(inFromLog, mainTreeKey, lsn);
      index = parent.insertEntry1(newRef);
      if ((index >= 0 &&
     (index & IN.EXACT_MATCH) != 0)) {

    index &= ~IN.EXACT_MATCH;

    /*
     * Replace whatever's at this entry, whether it's an LN or an
     * earlier root DIN as long as one of the following is true:
     *
     * - the entry is known deleted
     * - or the LSN is earlier than the one we've just read from
     *     the log.
     */
    if (parent.isEntryKnownDeleted(index)) {
        /* Be sure to clear the known deleted bit. */
        parent.setEntry(index, inFromLog, mainTreeKey,
            lsn, (byte) 0);
        replaced = true;
    } else {
        originalLsn = parent.getLsn(index);
        if (DbLsn.compareTo(originalLsn, lsn) < 0) {
      parent.setEntry(index, inFromLog, mainTreeKey, lsn,
          parent.getState(index));
      replaced = true;
        }
    }
      } else {
    found = false;
      }
      success = true;
        } finally {
            if (parent != null) {
                parent.releaseLatch();
            }
            trace(detailedTraceLevel,
                  db,
                  TRACE_DUP_ROOT_REPLACE, success, inFromLog,
                  lsn, parent, found,
                  replaced, inserted, originalLsn, DbLsn.NULL_LSN, index);
        }
    }

    /**
     * Decide whether to insert this IN from the log, or replace an existing
     * one.
     * @param db owning database
     * @param inFromLog IN which was instantiated from the log.
     * @param logLsn LSN of this entry
     * @param inLsn LSN of this in -- may not be the same as the log LSN if
     * the current entry is a BINDelta
     * @param trackingList debugging aid to save the path traversed by this
     * recovery call
     */
    private void replaceOrInsertChild(DatabaseImpl db,
                                      IN inFromLog,
                                      long logLsn,
                                      long inLsn,
                                      List trackingList,
                                      boolean requireExactMatch)
        throws DatabaseException {

        boolean inserted = false;
        boolean replaced = false;
        long originalLsn = DbLsn.NULL_LSN;
        boolean success = false;
        SearchResult result = new SearchResult();
        try {
            result = db.getTree().getParentINForChildIN
                (inFromLog,
                 requireExactMatch,
                 false, // updateGeneration
                 -1,    // targetLevel
                 trackingList);

            /*
             * Does inFromLog exist in this parent?
             *
             * 1. No possible parent -- skip this child. It's represented
             *    by a parent that's later in the log.
             * 2. No match, but a possible parent: insert inFromLog. This
             *    is the case when a tree is bootstrapped, and the root is
             *    written w/0 children.
             * 3. physical match: (LSNs same) this LSN is already in place,
             *                    do nothing.
             * 4. logical match: another version of this IN is in place.
             *                   Replace child with inFromLog if inFromLog's
             *                   LSN is greater.
             */
            if (result.parent == null) {
                return// case 1, no possible parent.
            }
           
            byte[] idKey = result.parent.getChildKey(inFromLog);

            /* Get the key that will locate inFromLog in this parent. */
            if (result.index >= 0) {
                if (result.parent.getLsn(result.index) == logLsn) {
                    /* case 3: do nothing */

                } else {

                    /*
                     * Not an exact physical match, now need to look at child.
                     */
                    if (result.exactParentFound) {
                        originalLsn = result.parent.getLsn(result.index);
                       
                        /* case 4: It's a logical match, replace */
                        if (DbLsn.compareTo(originalLsn, logLsn) < 0) {

                            /*
           * It's a logical match, replace. Put the child
           * node reference into the parent, as well as the
           * true LSN of the IN. (If this entry is a
           * BINDelta, the node has been updated with all the
           * deltas, but the LSN we want to put in should be
           * the last full LSN, not the LSN of the BINDelta)
                             */
                            result.parent.updateEntry(result.index,
                                                      inFromLog,
                                                      inLsn);
                            replaced = true;
                        }
                    } else {
                        /* case 2: no match, insert the new node. */
                        ChildReference ref =
                            new ChildReference(inFromLog, idKey, inLsn);
                        boolean insertOk = result.parent.insertEntry(ref);
                        assert insertOk:
                            "Nomatch, couln't insert for LSN " +
          DbLsn.toString(logLsn) +
                            " parent=" +  result.parent.getNodeId() +
                            " index=" + result.index;
                        inserted = true;
                    }
                }
            } else {
                /* case 2: no match */
                ChildReference newRef =
                    new ChildReference(inFromLog, idKey, inLsn);
                boolean insertOk = result.parent.insertEntry(newRef);
                assert insertOk;
                inserted = true;
            }
            success = true;
        } finally {
            if (result.parent != null) {
                result.parent.releaseLatch();
            }
            trace(detailedTraceLevel, db,
                  TRACE_IN_REPLACE, success, inFromLog,
                  logLsn, result.parent,
                  result.exactParentFound, replaced, inserted,
                  originalLsn, DbLsn.NULL_LSN, result.index);
        }
    }

    /**
     * Redo a committed LN for recovery.
     *
     * <pre>
     * log LN found  | logLSN > LSN | LN is deleted | action
     *   in tree     | in tree      |               |
     * --------------+--------------+---------------+------------------------
     *     Y         |    N         |    n/a        | no action
     * --------------+--------------+---------------+------------------------
     *     Y         |    Y         |     N         | replace w/log LSN
     * --------------+--------------+---------------+------------------------
     *     Y         |    Y         |     Y         | replace w/log LSN, put
     *               |              |               | on compressor queue
     * --------------+--------------+---------------+------------------------
     *     N         |    n/a       |     N         | insert into tree
     * --------------+--------------+---------------+------------------------
     *     N         |    n/a       |     Y         | no action
     * --------------+--------------+---------------+------------------------
     *
     * </pre>
     *
     * @param location holds state about the search in the tree. Passed
     *  in from the recovery manager to reduce objection creation overhead.
     * @param lnFromLog - the new node to put in the tree.
     * @param mainKey is the key that navigates us through the main tree
     * @param dupTreeKey is the key that navigates us through the duplicate
     * tree
     * @param logLsn is the LSN from the just-read log entry
     * @param info is a recovery stats object.
     * @return the LSN found in the tree, or null if not found.
     */
    private long redo(DatabaseImpl db,
          TreeLocation location,
          LN lnFromLog,
          byte[] mainKey,
          byte[] dupKey,
          long logLsn,
          RecoveryInfo info)
        throws DatabaseException {

        boolean found = false;
        boolean replaced = false;
        boolean inserted = false;
        boolean success = false;
        try {

            /*
             *  Find the BIN which is the parent of this LN.
             */
            location.reset();
            found = db.getTree().getParentBINForChildLN
                (location, mainKey, dupKey, lnFromLog,
                 true,  // splitsAllowed
                 false, // findDeletedEntries
                 true,  // searchDupTree
                 true); // updateGeneration

            if (!found && (location.bin == null)) {

                /*
                 * There is no possible parent for this LN. This tree was
                 * probably compressed away.
                 */
                success = true;
                return DbLsn.NULL_LSN;
            }

            /*
             * Now we're at the parent for this LN, whether BIN, DBIN or DIN
             */
            if (lnFromLog.containsDuplicates()) {
                if (found) {

        /*
         * This is a dupCountLN. It's ok if there's no DIN parent
         * for it. [#11307].
         */
        DIN duplicateRoot = (DIN)
      location.bin.fetchTarget(location.index);
        if (DbLsn.compareTo(logLsn, location.childLsn) >= 0) {
      /* DupCountLN needs replacing. */
      duplicateRoot.updateDupCountLNRefAndNullTarget(logLsn);
        }
                }
            } else {
                if (found) {

                    /*
                     * This LN is in the tree. See if it needs replacing.
                     */
                    info.lnFound++;

                    if (DbLsn.compareTo(logLsn, location.childLsn) > 0) {
                        info.lnReplaced++;
                        replaced = true;

                        /*
       * Be sure to make the target null. We don't want this
       * new LN resident, it will make recovery start
       * dragging in the whole tree and will consume too much
       * memory.
                         */
                        location.bin.updateEntry(location.index,
                                                 null,
                                                 logLsn);
                    }

                    /*
                     * If the entry in the tree is deleted, put it on the
                     * compressor queue.  Set KnownDeleted to prevent fetching
                     * a cleaned LN.
                     */
                    if (DbLsn.compareTo(logLsn, location.childLsn) >= 0 &&
                        lnFromLog.isDeleted()) {
                        location.bin.setKnownDeletedLeaveTarget
                            (location.index);
                        byte[] deletedKey = location.bin.containsDuplicates() ?
                            dupKey : mainKey;

                        /*
                         * In the case of SR 8984, the LN has no data and
                         * therefore no valid delete key. Don't compress.
                         */
                        if (deletedKey != null) {
                            db.getDbEnvironment().addToCompressorQueue
                                (location.bin,
                                 new Key(deletedKey),
                                 false); // don't wakeup compressor
                        }
                    }

                } else {
                    /*
                     * This LN is not in the tree. If it's not deleted, insert
                     * it.
                     */
                    info.lnNotFound++;
                    if (!lnFromLog.isDeleted()) {
                        info.lnInserted++;
                        inserted = true;
                        boolean insertOk =
          insertRecovery(db, location, logLsn);
                        assert insertOk;
                    }
                }
            }
            success = true;
            return found ? location.childLsn : DbLsn.NULL_LSN;
        } finally {
            if (location.bin != null) {
                location.bin.releaseLatchIfOwner();
            }
            trace(detailedTraceLevel, db,
                  TRACE_LN_REDO, success, lnFromLog,
                  logLsn, location.bin, found,
                  replaced, inserted,
                  location.childLsn, DbLsn.NULL_LSN, location.index);
        }
    }

    /**
     * Undo the changes to this node. Here are the rules that govern the action
     * taken.
     *
     * <pre>
     *
     * found LN in  | abortLsn is | logLsn ==       | action taken
     *    tree      | null        | LSN in tree     | by undo
     * -------------+-------------+----------------------------------------
     *      Y       |     N       |      Y          | replace w/abort LSN
     * ------------ +-------------+-----------------+-----------------------
     *      Y       |     Y       |      Y          | remove from tree
     * ------------ +-------------+-----------------+-----------------------
     *      Y       |     N/A     |      N          | no action
     * ------------ +-------------+-----------------+-----------------------
     *      N       |     N/A     |    N/A          | no action (*)
     * (*) If this key is not present in the tree, this record doesn't
     * reflect the IN state of the tree and this log entry is not applicable.
     *
     * </pre>
     * @param location holds state about the search in the tree. Passed
     *  in from the recovery manager to reduce objection creation overhead.
     * @param lnFromLog - the new node to put in the tree.
     * @param mainKey is the key that navigates us through the main tree
     * @param dupTreeKey is the key that navigates us through the duplicate
     *                   tree
     * @param logLsn is the LSN from the just-read log entry
     * @param abortLsn gives us the location of the original version of the
     *                 node
     * @param info is a recovery stats object.
     */
    public static void undo(Level traceLevel,
                            DatabaseImpl db,
                            TreeLocation location,
                            LN lnFromLog,
                            byte[] mainKey,
                            byte[] dupKey,
                            long logLsn,
                            long abortLsn,
                            boolean abortKnownDeleted,
                            RecoveryInfo info,
                            boolean splitsAllowed)
        throws DatabaseException {

        boolean found = false;
        boolean replaced = false;
        boolean success = false;

        try {

            /*
             * Find the BIN which is the parent of this LN.
             */
            location.reset();
            found = db.getTree().getParentBINForChildLN
                (location, mainKey, dupKey, lnFromLog, splitsAllowed,
                 true,  // findDeletedEntries
                 false, // searchDupTree
                 true); // updateGeneration

            /*
             * Now we're at the rightful parent, whether BIN or DBIN.
             */
            if (lnFromLog.containsDuplicates()) {

                /*
     * This is a dupCountLN. It's ok if there's no DIN parent
     * for it. [#11307].
     */
                if (found) {
        DIN duplicateRoot = (DIN)
      location.bin.fetchTarget(location.index);
        if (DbLsn.compareTo(logLsn, location.childLsn) == 0) {
      /* DupCountLN needs replacing. */
      duplicateRoot.
          updateDupCountLNRefAndNullTarget(abortLsn);
      replaced = true;
        }
    }
            } else {
                if (found) {
                    /* This LN is in the tree. See if it needs replacing. */
                    if (info != null) {
                        info.lnFound++;
                    }
        boolean updateEntry =
      DbLsn.compareTo(logLsn, location.childLsn) == 0;
        if (updateEntry) {
      if (abortLsn == DbLsn.NULL_LSN) {

          /*
           * To undo a node that was created by this txn,
           * remove it.  If this entry is deleted, put it on
                             * the compressor queue.  Set KnownDeleted to
                             * prevent fetching a cleaned LN.
           */
          location.bin.
        setKnownDeletedLeaveTarget(location.index);
                            byte[] deletedKey =
                                location.bin.containsDuplicates() ?
                                dupKey : mainKey;
          db.getDbEnvironment().addToCompressorQueue
        (location.bin,
                                 new Key(deletedKey),
                                 false); // don't wakeup compressor
                           
      } else {

          /*
           * Apply the log record by updating the in memory
           * tree slot to contain the abort LSN and abort
           * Known Deleted flag.
           */
          if (info != null) {
        info.lnReplaced++;
          }
          replaced = true;
          location.bin.updateEntry(location.index,
                 null,
                 abortLsn);
          if (abortKnownDeleted) {
        location.bin.setKnownDeleted(location.index);
          } else {
        location.bin.clearKnownDeleted(location.index);
          }
      }

                        /*
                         * We must clear the PendingDeleted flag for
                         * non-deleted entries.  Clear it unconditionally,
                         * since KnownDeleted will be set above for a deleted
                         * entry. [#12885]
                         */
                        location.bin.clearPendingDeleted(location.index);
        }

                } else {

                    /*
                     * This LN is not in the tree.  Just make a note of it.
                     */
                    if (info != null) {
                        info.lnNotFound++;
                    }
                }
            }

            success = true;
        } finally {
            /*
             * Note that undo relies on the caller to unlatch the bin.  Not
             * ideal, done in order to support abort processing.
             */
            trace(traceLevel, db, TRACE_LN_UNDO, success, lnFromLog,
      logLsn, location.bin, found, replaced, false,
      location.childLsn, abortLsn, location.index);
        }
    }

    /**
     * Inserts a LN into the tree for recovery redo processing.  In this
     * case, we know we don't have to lock when checking child LNs for deleted
     * status (there can be no other thread running on this tree) and we don't
     * have to log the new entry. (it's in the log already)
     *
     * @param db
     * @param location this embodies the parent bin, the index, the key that
     * represents this entry in the bin.
     * @param logLsn LSN of this current ln
     * @param key to use when creating a new ChildReference object.
     * @return true if LN was inserted, false if it was a duplicate
     * duplicate or if an attempt was made to insert a duplicate when
     * allowDuplicates was false.
     */
    private static boolean insertRecovery(DatabaseImpl db,
                                          TreeLocation location,
                                          long logLsn)
        throws DatabaseException {
       
        /* Make a child reference as a candidate for insertion. */
        ChildReference newLNRef =
      new ChildReference(null, location.lnKey, logLsn);

        BIN parentBIN = location.bin;
        int entryIndex = parentBIN.insertEntry1(newLNRef);

        if ((entryIndex & IN.INSERT_SUCCESS) == 0) {

            /*
       * Entry may have been a duplicate. Insertion was not successful.
       */
            entryIndex &= ~IN.EXACT_MATCH;

            boolean canOverwrite = false;
            if (parentBIN.isEntryKnownDeleted(entryIndex)) {
                canOverwrite = true;
            } else {

                /*
                 * Read the LN that's in this slot to check for deleted
                 * status.  No need to lock, since this is recovery.  If
                 * fetchTarget returns null, a deleted LN was cleaned.
                 */
                LN currentLN = (LN) parentBIN.fetchTarget(entryIndex);

                if (currentLN == null || currentLN.isDeleted()) {
                    canOverwrite = true;
                }

                /*
     * Evict the target again manually, to reduce memory
     * consumption while the evictor is not running.
                 */
    parentBIN.updateEntry(entryIndex, null);
            }

            if (canOverwrite) {
                parentBIN.updateEntry(entryIndex, null, logLsn,
                                      location.lnKey);
                parentBIN.clearKnownDeleted(entryIndex);
                location.index = entryIndex;
                return true;
            } else {
                return false;
            }
        }
        location.index = entryIndex & ~IN.INSERT_SUCCESS;
        return true;
    }

    /**
     * Update file utilization info during redo.
     */
    private void redoUtilizationInfo(long logLsn,
             long treeLsn,
                                     long abortLsn,
             boolean abortKnownDeleted,
                                     LN ln,
             TxnNodeId txnNodeId,
                                     Set countedAbortLsnNodes) {

        UtilizationTracker tracker = env.getUtilizationTracker();

        /*
         * If the LN is marked deleted and its LSN follows the FileSummaryLN
         * for its file, count it as obsolete.
         */
        if (ln.isDeleted()) {
            Long logFileNum = new Long(DbLsn.getFileNumber(logLsn));
            long fileSummaryLsn =
    DbLsn.longToLsn((Long) fileSummaryLsns.get(logFileNum));
            int cmpFsLsnToLogLsn =
    (fileSummaryLsn != DbLsn.NULL_LSN) ?
    DbLsn.compareTo(fileSummaryLsn, logLsn) : -1;
            if (cmpFsLsnToLogLsn < 0) {
                tracker.countObsoleteNode(logLsn, null);
            }
        }

        /* Was the LN found in the tree? */
        if (treeLsn != DbLsn.NULL_LSN) {
            int cmpLogLsnToTreeLsn = DbLsn.compareTo(logLsn, treeLsn);

            /*
             * If the oldLsn and newLsn differ and the newLsn follows the
             * FileSummaryLN for the file of the oldLsn, count the oldLsn as
             * obsolete.
             */
            if (cmpLogLsnToTreeLsn != 0) {
                long newLsn = (cmpLogLsnToTreeLsn < 0) ? treeLsn : logLsn;
                long oldLsn = (cmpLogLsnToTreeLsn > 0) ? treeLsn : logLsn;
                Long oldLsnFile = new Long(DbLsn.getFileNumber(oldLsn));
    long oldFsLsn =
        DbLsn.longToLsn((Long) fileSummaryLsns.get(oldLsnFile));
                int cmpOldFsLsnToNewLsn =
        (oldFsLsn != DbLsn.NULL_LSN) ?
        DbLsn.compareTo(oldFsLsn, newLsn) : -1;
                if (cmpOldFsLsnToNewLsn < 0) {
                    tracker.countObsoleteNode(oldLsn, null);
                }
            }

            /*
             * If the logLsn is equal to or precedes the treeLsn and the entry
             * has an abortLsn that was not previously deleted, consider the
             * set of entries for the given node.  If the logLsn is the first
             * in the set that follows the FileSummaryLN of the abortLsn, count
             * the abortLsn as obsolete.
             */
            if (cmpLogLsnToTreeLsn <= 0 &&
                abortLsn != DbLsn.NULL_LSN &&
                !abortKnownDeleted &&
                !countedAbortLsnNodes.contains(txnNodeId)) {
                /* We have not counted this abortLsn yet. */
                Long abortFileNum = new Long(DbLsn.getFileNumber(abortLsn));
    long abortFsLsn =
        DbLsn.longToLsn((Long) fileSummaryLsns.get(abortFileNum));
                int cmpAbortFsLsnToLogLsn =
        (abortFsLsn != DbLsn.NULL_LSN) ?
        DbLsn.compareTo(abortFsLsn, logLsn) : -1;
                if (cmpAbortFsLsnToLogLsn < 0) {

                    /*
                     * logLsn follows the FileSummaryLN of the abortLsn.  The
                     * abortLsn is only an approximation of the prior LSN, so
                     * use inexact counting.
                     */
                    tracker.countObsoleteNodeInexact(abortLsn, null);

                    /* Don't count this abortLsn (this node) again. */
                    countedAbortLsnNodes.add(txnNodeId);
                }
            }
        }
    }

    /**
     * Update file utilization info during recovery undo (not abort undo).
     */
    private void undoUtilizationInfo(LN ln,
             long logLsn,
             long abortLsn,
                                     boolean abortKnownDeleted,
                                     TxnNodeId txnNodeId,
                                     Map countedFileSummaries,
                                     Set countedAbortLsnNodes) {

        UtilizationTracker tracker = env.getUtilizationTracker();

        /* Compare the fileSummaryLsn to the logLsn. */
        Long logFileNum = new Long(DbLsn.getFileNumber(logLsn));
        long fileSummaryLsn =
            DbLsn.longToLsn((Long) fileSummaryLsns.get(logFileNum));
        int cmpFsLsnToLogLsn = (fileSummaryLsn != DbLsn.NULL_LSN) ?
            DbLsn.compareTo(fileSummaryLsn, logLsn) : -1;

        /*
         * Count the logLsn as obsolete if it follows the FileSummaryLN for the
         * file of its Lsn.
         */
        if (cmpFsLsnToLogLsn < 0) {
            tracker.countObsoleteNode(logLsn, null);
        }

        /*
         * Consider the latest LSN for the given node that precedes the
         * FileSummaryLN for the file of its LSN.  Count this LSN as obsolete
         * if it is not a deleted LN.
         */
        if (cmpFsLsnToLogLsn > 0) {
            Long countedFile = (Long) countedFileSummaries.get(txnNodeId);
            if (countedFile == null ||
                countedFile.longValue() > logFileNum.longValue()) {

                /*
                 * We encountered a new file number and the FsLsn follows the
                 * logLsn.
                 */
                if (!ln.isDeleted()) {
                    tracker.countObsoleteNode(logLsn, null);
                }
                /* Don't count this file again. */
                countedFileSummaries.put(txnNodeId, logFileNum);
            }
        }
    }

    /**
     * Concoct a header for the recovery pass trace info.
     */
    private String passStartHeader(int passNum) {
        return "Recovery Pass " + passNum + " start: ";
    }

    /**
     * Concoct a header for the recovery pass trace info.
     */
    private String passEndHeader(int passNum, long start, long end) {
        return "Recovery Pass " + passNum + " end (" +
            (end-start) + "): ";
    }

    /**
     * Send trace messages to the java.util.logger. Don't rely on the logger
     * alone to conditionalize whether we send this message, we don't even want
     * to construct the message if the level is not enabled. This is used to
     * construct verbose trace messages for individual log entry processing.
     */
    private static void trace(Level level,
                              DatabaseImpl database,
                              String debugType,
                              boolean success,
                              Node node,
                              long logLsn,
                              IN parent,
                              boolean found,
                              boolean replaced,
                              boolean inserted,
                              long replacedLsn,
            long abortLsn,
            int index) {
        Logger logger = database.getDbEnvironment().getLogger();
        Level useLevel= level;
        if (!success) {
            useLevel = Level.SEVERE;
        }
        if (logger.isLoggable(useLevel)) {
            StringBuffer sb = new StringBuffer();
            sb.append(debugType);
            sb.append(" success=").append(success);
            sb.append(" node=");
            sb.append(node.getNodeId());
            sb.append(" lsn=");
            sb.append(DbLsn.getNoFormatString(logLsn));
            if (parent != null) {
                sb.append(" parent=").append(parent.getNodeId());
            }
            sb.append(" found=");
            sb.append(found);
            sb.append(" replaced=");
            sb.append(replaced);
            sb.append(" inserted=");
            sb.append(inserted);
            if (replacedLsn != DbLsn.NULL_LSN) {
                sb.append(" replacedLsn=");
                sb.append(DbLsn.getNoFormatString(replacedLsn));
            }
            if (abortLsn != DbLsn.NULL_LSN) {
                sb.append(" abortLsn=");
                sb.append(DbLsn.getNoFormatString(abortLsn));
            }
            sb.append(" index=").append(index);
            logger.log(useLevel, sb.toString());
        }
    }

    /**
     * Send trace messages to the java.util.logger. Don't rely on the logger
     * alone to conditionalize whether we send this message, we don't even want
     * to construct the message if the level is not enabled.
     */
    private void traceINDeleteReplay(long nodeId,
                                     long logLsn,
                                     boolean found,
                                     boolean deleted,
                                     int index,
             boolean isDuplicate) {
        Logger logger = env.getLogger();
        if (logger.isLoggable(detailedTraceLevel)) {
            StringBuffer sb = new StringBuffer();
            sb.append((isDuplicate) ?
          TRACE_IN_DUPDEL_REPLAY :
          TRACE_IN_DEL_REPLAY);
            sb.append(" node=").append(nodeId);
            sb.append(" lsn=").append(DbLsn.getNoFormatString(logLsn));
            sb.append(" found=").append(found);
            sb.append(" deleted=").append(deleted);
            sb.append(" index=").append(index);
            logger.log(detailedTraceLevel, sb.toString());
        }
    }

    private void traceAndThrowException(long badLsn,
          String method,
          Exception originalException)
        throws DatabaseException {
        String badLsnString = DbLsn.getNoFormatString(badLsn);
        Tracer.trace(env,
                     "RecoveryManager",
                     method,
                     "last LSN = " + badLsnString,
                     originalException);
        throw new DatabaseException("last LSN=" + badLsnString,
                                    originalException);
    }

    /**
     * Log trace information about root deletions, called by INCompressor and
     * recovery.
     */
    public static void traceRootDeletion(Level level, DatabaseImpl database) {
        Logger logger = database.getDbEnvironment().getLogger();
        if (logger.isLoggable(level)) {
            StringBuffer sb = new StringBuffer();
            sb.append(TRACE_ROOT_DELETE);
            sb.append(" Dbid=").append(database.getId());
            logger.log(level, sb.toString());
        }
    }
}
TOP

Related Classes of com.sleepycat.je.recovery.RecoveryManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.