Package com.sleepycat.je.tree

Source Code of com.sleepycat.je.tree.Tree

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.
*
*/

package com.sleepycat.je.tree;

import static com.sleepycat.je.dbi.BTreeStatDefinition.BTREE_RELATCHES_REQUIRED;
import static com.sleepycat.je.dbi.BTreeStatDefinition.BTREE_ROOT_SPLITS;
import static com.sleepycat.je.dbi.BTreeStatDefinition.GROUP_DESC;
import static com.sleepycat.je.dbi.BTreeStatDefinition.GROUP_NAME;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.ListIterator;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.BtreeStats;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.cleaner.LocalUtilizationTracker;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.LogManager;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.recovery.RecoveryManager;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.IntStat;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.RelatchRequiredException;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;

/**
* Tree implements the JE B+Tree.
*
* A note on tree search patterns:
* There's a set of Tree.search* methods. Some clients of the tree use
* those search methods directly, whereas other clients of the tree
* tend to use methods built on top of search.
*
* The semantics of search* are
*   they leave you pointing at a BIN or IN
*   they don't tell you where the reference of interest is.
* The semantics of the get* methods are:
*   they leave you pointing at a BIN or IN
*   they return the index of the slot of interest
*   they traverse down to whatever level is needed
*   they are built on top of search* methods.
* For the future:
* Over time, we need to clarify which methods are to be used by clients
* of the tree. Preferably clients that call the tree use get*, although
* their are cases where they need visibility into the tree structure.
*
* Also, search* should return the location of the slot to save us a
* second binary search.
*
* Search Method Call Hierarchy
* ----------------------------
* getFirst/LastNode
*  search
*  CALLED BY:
*   CursorImpl.getFirstOrLast
*
* getNext/PrevBin
*  getParentINForChildIN
*  searchSubTree
*  CALLED BY:
*   DupConvert
*   CursorImpl.getNext
*
* getParentINForChildIN
*  IN.findParent
*  does not use shared latching
*  CALLED BY:
*   Checkpointer.flushIN (doFetch=false, targetLevel=-1)
*   FileProcessor.processIN (doFetch=true, targetLevel=LEVEL)
*   Evictor.evictIN (doFetch=true, targetLevel=-1)
*   RecoveryManager.replaceOrInsertChild (doFetch=true, targetLevel=-1)
*   getNext/PrevBin (doFetch=true, targetLevel=-1)
*
* search
*  searchSubTree
*  CALLED BY:
*   CursorImpl.searchAndPosition
*   INCompressor to find BIN
* searchSubTree
*  uses shared grandparent latching
*
* getParentBINForChildLN
*  searchSplitsAllowed
*   CALLED BY:
*    RecoveryManager.redo
*    RecoveryManager.recoveryUndo
*  search
*   CALLED BY:
*    RecoveryManager.abortUndo
*    RecoveryManager.rollbackUndo
*    FileProcessor.processLN
*    Cleaner.processPendingLN
*    UtilizationProfile.verifyLsnIsObsolete (utility)
*
* findBinForInsert
*  searchSplitsAllowed
*  CALLED BY:
*   CursorImpl.putInternal
*
* searchSplitsAllowed
*  uses shared non-grandparent latching
*  CALLED BY:
*   DupConvert (instead of findBinForInsert, which needs a cursor)
*
* Possible Shared Latching Improvements
* -------------------------------------
* By implementing grandparent latching in searchSplitsAllowed we would
* increase performance slightly in these cases:
*  Insertions
*
* By implementing shared latching in getParentINForChildIN we would get better
* concurrency in these cases:
*  Cursor scan, when moving between BINs
*  Eviction
*  Checkpoints
*  Cleaning INs
* By implementing shared latching for BINs we would get better concurrency in
* these cases:
*  Reads when LN is in cache, or LN is not needed (key-only op, e.g., dups)
*/
public final class Tree implements Loggable {

    /* For debug tracing */
    private static final String TRACE_ROOT_SPLIT = "RootSplit:";

    private DatabaseImpl database;
    private ChildReference root;
    private int maxTreeEntriesPerNode;

    /* Stats */
    private StatGroup stats;

    /* The number of tree root splited. */
    private IntStat rootSplits;
    /* The number of latch upgrades from shared to exclusive required. */
    private LongStat relatchesRequired;

    /*
     * Latch that must be held when using/accessing the root node.  Protects
     * against the root being changed out from underneath us by splitRoot.
     */
    private SharedLatch rootLatch;

    private ThreadLocal<TreeWalkerStatsAccumulator> treeStatsAccumulatorTL =
        new ThreadLocal<TreeWalkerStatsAccumulator>();

    /*
     * We don't need the stack trace on this so always throw a static and
     * avoid the cost of Throwable.fillInStack() every time it's thrown.
     * [#13354].
     */
    private static SplitRequiredException splitRequiredException =
        new SplitRequiredException();

    /**
     * Embodies an enum for the type of search being performed.  NORMAL means
     * do a regular search down the tree.  LEFT/RIGHT means search down the
     * left/right side to find the first/last node in the tree.
     */
    public static class SearchType {
        /* Search types */
        public static final SearchType NORMAL = new SearchType();
        public static final SearchType LEFT   = new SearchType();
        public static final SearchType RIGHT  = new SearchType();

        /* No lock types can be defined outside this class. */
        private SearchType() {
        }
    }

    /* For unit tests */
    private TestHook waitHook; // used for generating race conditions
    private TestHook searchHook; // [#12736]
    private TestHook ckptHook; // [#13897]

    /**
     * Create a new tree.
     */
    public Tree(DatabaseImpl database) {
        init(database);
        setDatabase(database);
    }

    /**
     * Create a tree that's being read in from the log.
     */
    public Tree() {
        init(null);
        maxTreeEntriesPerNode = 0;
    }

    /**
     * constructor helper
     */
    private void init(DatabaseImpl database) {
        rootLatch = new SharedLatch("RootLatch");
        this.root = null;
        this.database = database;

        /* Do the stats definitions. */
        stats = new StatGroup(GROUP_NAME, GROUP_DESC);
        relatchesRequired = new LongStat(stats, BTREE_RELATCHES_REQUIRED);
        rootSplits = new IntStat(stats, BTREE_ROOT_SPLITS);
    }

    /**
     * Set the database for this tree. Used by recovery when recreating an
     * existing tree.
     */
    public void setDatabase(DatabaseImpl database) {
        this.database = database;

        maxTreeEntriesPerNode = database.getNodeMaxTreeEntries();
    }

    /**
     * @return the database for this Tree.
     */
    public DatabaseImpl getDatabase() {
        return database;
    }

    /**
     * Set the root for the tree. Should only be called within the root latch.
     */
    public void setRoot(ChildReference newRoot, boolean notLatched) {
        assert (notLatched || rootLatch.isWriteLockedByCurrentThread());
        root = newRoot;
    }

    public ChildReference makeRootChildReference(Node target,
                                                 byte[] key,
                                                 long lsn) {
        return new RootChildReference(target, key, lsn);
    }

    private ChildReference makeRootChildReference() {
        return new RootChildReference();
    }

    /*
     * A tree doesn't have a root if (a) the root field is null, or (b) the
     * root is non-null, but has neither a valid target nor a valid LSN. Case
     * (b) can happen if the database is or was previously opened in deferred
     * write mode.
     *
     * @return false if there is no real root.
     */
    public boolean rootExists() {
        if (root == null) {
            return false;
        }

        if ((root.getTarget() == null) &&
            (root.getLsn() == DbLsn.NULL_LSN)) {
            return false;
        }

        return true;
    }

    /**
     * Perform a fast check to see if the root IN is resident.  No latching is
     * performed.  To ensure that the root IN is not loaded by another thread,
     * this method should be called while holding a write lock on the MapLN.
     * That will prevent opening the DB in another thread, and potentially
     * loading the root IN. [#13415]
     */
    public boolean isRootResident() {
        return root != null && root.getTarget() != null;
    }

    /*
     * Class that overrides fetchTarget() so that if the rootLatch is not
     * held exclusively when the root is fetched, we upgrade it to exclusive.
     */
    private class RootChildReference extends ChildReference {

        private RootChildReference() {
            super();
        }

        private RootChildReference(Node target, byte[] key, long lsn) {
            super(target, key, lsn);
        }

        /* Caller is responsible for releasing rootLatch. */
        @Override
        public Node fetchTarget(DatabaseImpl database, IN in)
            throws DatabaseException {

            if (getTarget() == null &&
                !rootLatch.isWriteLockedByCurrentThread()) {
                rootLatch.release();
                rootLatch.acquireExclusive();
            }

            return super.fetchTarget(database, in);
        }

        @Override
        public void setTarget(Node target) {
            assert rootLatch.isWriteLockedByCurrentThread();
            super.setTarget(target);
        }

        @Override
        public void clearTarget() {
            assert rootLatch.isWriteLockedByCurrentThread();
            super.clearTarget();
        }

        @Override
        public void setLsn(long lsn) {
            assert rootLatch.isWriteLockedByCurrentThread();
            super.setLsn(lsn);
        }

        @Override
        void updateLsnAfterOptionalLog(DatabaseImpl dbImpl, long lsn) {
            assert rootLatch.isWriteLockedByCurrentThread();
            super.updateLsnAfterOptionalLog(dbImpl, lsn);
        }
    }

    /**
     * Get LSN of the rootIN. Obtained without latching, should only be
     * accessed while quiescent.
     */
    public long getRootLsn() {
        if (root == null) {
            return DbLsn.NULL_LSN;
        } else {
            return root.getLsn();
        }
    }

    /**
     * @return the TreeStats for this tree.
     */
    int getTreeStats() {
        return rootSplits.get();
    }

    private TreeWalkerStatsAccumulator getTreeStatsAccumulator() {
        if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) {
            return treeStatsAccumulatorTL.get();
        } else {
            return null;
        }
    }

    public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) {
        treeStatsAccumulatorTL.set(tSA);
    }

    public IN withRootLatchedExclusive(WithRootLatched wrl)
        throws DatabaseException {

        try {
            rootLatch.acquireExclusive();
            return wrl.doWork(root);
        } finally {
            rootLatch.release();
        }
    }

    public IN withRootLatchedShared(WithRootLatched wrl)
        throws DatabaseException {

        try {
            rootLatch.acquireShared();
            return wrl.doWork(root);
        } finally {
            rootLatch.release();
        }
    }

    public void latchRootLatchExclusive()
        throws DatabaseException {

        rootLatch.acquireExclusive();
    }

    public void releaseRootLatch()
        throws DatabaseException {

        rootLatch.releaseIfOwner();
    }

    /**
     * Deletes a BIN specified by key from the tree. If the BIN resides in a
     * subtree that can be pruned away, prune as much as possible, so we
     * don't leave a branch that has no BINs.
     *
     * It's possible that the targeted BIN will now have entries, or will
     * have resident cursors. Either will prevent deletion.
     *
     * @param idKey - the identifier key of the node to delete.
     * @param localTracker is used for tracking obsolete node info.
     */
    public void delete(byte[] idKey,
                       LocalUtilizationTracker localTracker)
        throws DatabaseException,
               NodeNotEmptyException,
               CursorsExistException {

        IN subtreeRootIN = null;

        /*
         * A delete is a reverse split that must be propagated up to the root.
         * [#13501] Keep all nodes from the rootIN to the parent of the
         * deletable subtree latched as we descend so we can log the
         * IN deletion and cascade the logging up the tree. The latched
         * nodes are kept in order in the nodeLadder.
         */
        ArrayList<SplitInfo> nodeLadder = new ArrayList<SplitInfo>();

        IN rootIN = null;
        boolean rootNeedsUpdating = false;
        rootLatch.acquireExclusive();
        try {
            if (!rootExists()) {
                /* no action, tree is deleted or was never persisted. */
                return;
            }

            rootIN = (IN) root.fetchTarget(database, null);
            rootIN.latch(CacheMode.UNCHANGED);

            searchDeletableSubTree(rootIN, idKey, nodeLadder);
            if (nodeLadder.size() == 0) {

                /*
                 * The tree is empty, so do nothing.  Root compression is no
                 * longer supported.  Root compression has no impact on memory
                 * usage now that we evict the root IN.  It reduces log space
                 * taken by INs for empty (but not removed) databases, yet
                 * requires logging a INDelete and MapLN; this provides very
                 * little benefit, if any.  Because it requires extensive
                 * testing (which has not been done), this minor benefit is not
                 * worth the cost.  And by removing it we no longer log
                 * INDelete, which reduces complexity going forward. [#17546]
                 */
            } else {
                /* Detach this subtree. */
                SplitInfo detachPoint =
                    nodeLadder.get(nodeLadder.size() - 1);
                boolean deleteOk =
                    detachPoint.parent.deleteEntry(detachPoint.index,
                                                   true);
                assert deleteOk;

                /* Cascade updates upward, including writing the root IN. */
                rootNeedsUpdating = cascadeUpdates(nodeLadder, -1);
                subtreeRootIN = detachPoint.child;
            }
        } finally {
            releaseNodeLadderLatches(nodeLadder);

            if (rootIN != null) {
                rootIN.releaseLatch();
            }

            rootLatch.release();
        }

        if (subtreeRootIN != null) {

            EnvironmentImpl envImpl = database.getDbEnvironment();
            if (rootNeedsUpdating) {

                /*
                 * modifyDbRoot will grab locks and we can't have the INList
                 * latches or root latch held while it tries to acquire locks.
                 */
                DbTree dbTree = envImpl.getDbTree();
                dbTree.optionalModifyDbRoot(database);
                RecoveryManager.traceRootDeletion
                    (envImpl.getLogger(), database);
            }

            /*
             * Count obsolete nodes after logging the delete. We can do
             * this without having the nodes of the subtree latched because the
             * subtree has been detached from the tree.
             */
            INList inList = envImpl.getInMemoryINs();
            accountForSubtreeRemoval(inList, subtreeRootIN, localTracker);
        }
    }

    private void releaseNodeLadderLatches(ArrayList<SplitInfo> nodeLadder)
        throws DatabaseException {

        /*
         * Clear any latches left in the node ladder. Release from the
         * bottom up.
         */
        ListIterator<SplitInfo> iter =
            nodeLadder.listIterator(nodeLadder.size());
        while (iter.hasPrevious()) {
            SplitInfo info = iter.previous();
            info.child.releaseLatch();
        }
    }

    /**
     * Update nodes for a delete, going upwards. For example, suppose a
     * node ladder holds:
     * INa, INb, index for INb in INa
     * INb, INc, index for INc in INb
     * INc, BINd, index for BINd in INc
     *
     * When we enter this method, BINd has already been removed from INc. We
     * need to
     *  - log INc
     *  - update INb, log INb
     *  - update INa, log INa
     *
     * @param nodeLadder List of SplitInfos describing each node pair on the
     * downward path
     * @param index slot occupied by this din tree.
     * @return whether the DB root needs updating.
     */
    private boolean cascadeUpdates(ArrayList<SplitInfo> nodeLadder, int index)
        throws DatabaseException {

        ListIterator<SplitInfo> iter =
            nodeLadder.listIterator(nodeLadder.size());
        EnvironmentImpl envImpl = database.getDbEnvironment();
        LogManager logManager = envImpl.getLogManager();

        long newLsn = DbLsn.NULL_LSN;
        SplitInfo info = null;
        while (iter.hasPrevious()) {
            info = iter.previous();

            if (newLsn != DbLsn.NULL_LSN) {
                info.parent.updateEntry(info.index, newLsn);
            }
            newLsn = info.parent.optionalLog(logManager);
        }

        boolean rootNeedsUpdating = false;
        if (info != null) {
            /* We've logged the top of this subtree, record it properly. */
            assert info.parent.isDbRoot();
            /* We updated the rootIN of the database. */
            assert rootLatch.isWriteLockedByCurrentThread();
            root.updateLsnAfterOptionalLog(database, newLsn);
            rootNeedsUpdating = true;
        }
        return rootNeedsUpdating;
    }

    /**
     * Find the leftmost node (IN or BIN) in the tree.
     *
     * @return the leftmost node in the tree, null if the tree is empty.  The
     * returned node is latched and the caller must release it.
     */
    public IN getFirstNode(CacheMode cacheMode)
        throws DatabaseException {

        return search(null, SearchType.LEFT, null, cacheMode,
                      null /*searchComparator*/);
    }

    /**
     * Find the rightmost node (IN or BIN) in the tree.
     *
     * @return the rightmost node in the tree, null if the tree is empty.  The
     * returned node is latched and the caller must release it.
     */
    public IN getLastNode(CacheMode cacheMode)
        throws DatabaseException {

        return search(null, SearchType.RIGHT, null, cacheMode,
                      null /*searchComparator*/);
    }

    /**
     * GetParentNode without optional tracking.
     */
    public SearchResult getParentINForChildIN(IN child,
                                              boolean requireExactMatch,
                                              CacheMode cacheMode)
        throws DatabaseException {

        return getParentINForChildIN
            (child, requireExactMatch, cacheMode, -1 /*targetLevel*/, null);
    }

    /**
     * Return a reference to the parent or possible parent of the child.  Used
     * by objects that need to take a standalone node and find it in the tree,
     * like the evictor, checkpointer, and recovery.
     *
     * @param child The child node for which to find the parent.  This node is
     * latched by the caller and is released by this function before returning
     * to the caller.
     *
     * @param requireExactMatch if true, we must find the exact parent, not a
     * potential parent.
     *
     * @param cacheMode The CacheMode for affecting the hotness of the tree.
     *
     * @param trackingList if not null, add the LSNs of the parents visited
     * along the way, as a debug tracing mechanism. This is meant to stay in
     * production, to add information to the log.
     *
     * @return a SearchResult object. If the parent has been found,
     * result.foundExactMatch is true. If any parent, exact or potential has
     * been found, result.parent refers to that node.
     */
    public SearchResult getParentINForChildIN(IN child,
                                              boolean requireExactMatch,
                                              CacheMode cacheMode,
                                              int targetLevel,
                                              List<TrackingInfo> trackingList)
        throws DatabaseException {

        /* Sanity checks */
        if (child == null) {
            throw EnvironmentFailureException.unexpectedState
                ("getParentNode passed null");
        }

        assert child.isLatchOwnerForWrite();

        /*
         * Get information from child before releasing latch.
         */
        byte[] treeKey = child.getIdentifierKey();
        boolean isRoot = child.isRoot();
        child.releaseLatch();

        return getParentINForChildIN(child.getNodeId(),
                                     isRoot,
                                     treeKey,
                                     requireExactMatch,
                                     cacheMode,
                                     targetLevel,
                                     trackingList,
                                     true);
    }

    /**
     * Return a reference to the parent or possible parent of the child.  Used
     * by objects that need to take a node ID and find it in the tree,
     * like the evictor, checkpointer, and recovery.
     *
     * @param requireExactMatch if true, we must find the exact parent, not a
     * potential parent.
     *
     * @param cacheMode The CacheMode for affecting the hotness of the tree.
     *
     * @param trackingList if not null, add the LSNs of the parents visited
     * along the way, as a debug tracing mechanism. This is meant to stay in
     * production, to add information to the log.
     *
     * @param doFetch if false, stop the search if we run into a non-resident
     * child. Used by the checkpointer to avoid conflicting with work done
     * by the evictor.
     *
     * @return a SearchResult object. If the parent has been found,
     * result.foundExactMatch is true. If any parent, exact or potential has
     * been found, result.parent refers to that node.
     */
    public SearchResult getParentINForChildIN(long targetNodeId,
                                              boolean targetIsRoot,
                                              byte[] targetTreeKey,
                                              boolean requireExactMatch,
                                              CacheMode cacheMode,
                                              int targetLevel,
                                              List<TrackingInfo> trackingList,
                                              boolean doFetch)
        throws DatabaseException {

        /*
         * Use exclusive latching. Since the caller will be logging the child
         * IN, the parent IN must be latched exclusively. [#18567]
         */
        IN rootIN = getRootINLatchedExclusive(cacheMode);

        SearchResult result = new SearchResult();
        if (rootIN != null) {
            /* The tracking list is a permanent tracing aid. */
            if (trackingList != null) {
                trackingList.add(new TrackingInfo(root.getLsn(),
                                                  rootIN.getNodeId(),
                                                  rootIN.getNEntries()));
            }

            IN potentialParent = rootIN;
            boolean success = false;

            try {
                while (result.keepSearching) {

                    /*
                     * [12736] Prune away oldBin.  Assert has intentional
                     * side effect.
                     */
                    assert TestHookExecute.doHookIfSet(searchHook);

                    potentialParent.findParent(SearchType.NORMAL,
                                               targetNodeId,
                                               targetIsRoot,
                                               targetTreeKey,
                                               result,
                                               requireExactMatch,
                                               cacheMode,
                                               targetLevel,
                                               doFetch);

                    /* Update tracking list. */
                    if (trackingList != null) {
                        trackingList.get(trackingList.size() - 1).
                            setIndex(result.index);
                        if (result.keepSearching) {
                            trackingList.add(new TrackingInfo
                                (potentialParent.getLsn(result.index),
                                 result.parent.getNodeId(),
                                 result.parent.getNEntries()));
                        }
                    }

                    /* Move to next potential parent. */
                    potentialParent = result.parent;
                }
                success = true;

            } catch (RelatchRequiredException e) {
                /* Should never happen because we use exclusive latches. */
                throw EnvironmentFailureException.unexpectedException(e);
            } finally {

                /*
                 * The only thing that can be latched at this point is
                 * potentialParent.
                 */
                if (!success) {
                    potentialParent.releaseLatch();
                }
            }
        }
        return result;
    }

    /**
     * Return a reference to the parent of this LN. This searches through the
     * tree and allows splits. Set the tree location to the proper BIN parent
     * whether or not the LN child is found. That's because if the LN is not
     * found, recovery or abort will need to place it within the tree, and so
     * we must point at the appropriate position.
     *
     * <p>When this method returns with location.bin non-null, the BIN is
     * latched and must be unlatched by the caller.  Note that location.bin may
     * be non-null even if this method returns false.</p>
     *
     * @param location a holder class to hold state about the location
     * of our search. Sort of an internal cursor.
     *
     * @param key key to navigate through main key
     *
     * @param splitsAllowed true if this method is allowed to cause tree splits
     * as a side effect. In practice, recovery can cause splits, but abort
     * can't.
     *
     * @param cacheMode The CacheMode for affecting the hotness of the tree.
     *
     * @return true if node found in tree.
     * If false is returned and there is the possibility that we can insert
     * the record into a plausible parent we must also set
     * - location.bin (may be null if no possible parent found)
     * - location.lnKey (don't need to set if no possible parent).
     */
    public boolean getParentBINForChildLN(TreeLocation location,
                                          byte[] key,
                                          boolean splitsAllowed,
                                          boolean findDeletedEntries,
                                          CacheMode cacheMode)
        throws DatabaseException {

        /*
         * Find the BIN that either points to this LN or could be its
         * ancestor.
         */
        IN searchResult = null;
        if (splitsAllowed) {
            searchResult = searchSplitsAllowed(key, cacheMode,
                                               null /*searchComparator*/);
        } else {
            searchResult = search(key, SearchType.NORMAL, null, cacheMode,
                                  null /*searchComparator*/);
        }
        location.bin = (BIN) searchResult;

        if (location.bin == null) {
            return false;
        }

        /*
         * If caller wants us to consider knownDeleted entries then do an
         * inexact search in findEntry since that will find knownDeleted
         * entries.  If caller doesn't want us to consider knownDeleted entries
         * then do an exact search in findEntry since that will not return
         * knownDeleted entries.
         */
        boolean exactSearch = false;
        boolean indicateIfExact = true;
        if (!findDeletedEntries) {
            exactSearch = true;
            indicateIfExact = false;
        }
        location.index =
            location.bin.findEntry(key, indicateIfExact, exactSearch);

        boolean match = false;
        if (findDeletedEntries) {
            match = (location.index >= 0 &&
                     (location.index & IN.EXACT_MATCH) != 0);
            location.index &= ~IN.EXACT_MATCH;
        } else {
            match = (location.index >= 0);
        }

        if (match) {
            location.childLsn = location.bin.getLsn(location.index);
            return true;
        } else {
            location.lnKey = key;
            return false;
        }
    }

    /**
     * Return a reference to the adjacent BIN.
     *
     * @param bin The BIN to find the next BIN for.  This BIN is latched.
     *
     * @return The next BIN, or null if there are no more.  The returned node
     * is latched and the caller must release it.  If null is returned, the
     * argument BIN remains latched.
     */
    public BIN getNextBin(BIN bin,
                          CacheMode cacheMode)
        throws DatabaseException {

        return getNextBinInternal(bin, true, cacheMode);
    }

    /**
     * Return a reference to the previous BIN.
     *
     * @param bin The BIN to find the next BIN for.  This BIN is latched.
     *
     * @return The previous BIN, or null if there are no more.  The returned
     * node is latched and the caller must release it.  If null is returned,
     * the argument bin remains latched.
     */
    public BIN getPrevBin(BIN bin,
                          CacheMode cacheMode)
        throws DatabaseException {

        return getNextBinInternal(bin, false, cacheMode);
    }

    /**
     * Helper routine for above two routines to iterate through BIN's.
     */
    private BIN getNextBinInternal(BIN bin,
                                   boolean forward,
                                   CacheMode cacheMode)
        throws DatabaseException {

        /*
         * Use the right most key (for a forward progressing cursor) or the
         * left most key (for a backward progressing cursor) as the idkey.  The
         * reason is that the BIN may get split while finding the next BIN so
         * it's not safe to take the BIN's identifierKey entry.  If the BIN
         * gets splits, then the right (left) most key will still be on the
         * resultant node.  The exception to this is that if there are no
         * entries, we just use the identifier key.
         */
        byte[] idKey = null;

        if (bin.getNEntries() == 0) {
            idKey = bin.getIdentifierKey();
        } else if (forward) {
            idKey = bin.getKey(bin.getNEntries() - 1);
        } else {
            idKey = bin.getKey(0);
        }

        IN next = bin;
        boolean nextIsLatched = false;

        assert LatchSupport.countLatchesHeld() == 1:
            LatchSupport.latchesHeldToString();

        /*
         * Ascend the tree until we find a level that still has nodes to the
         * right (or left if !forward) of the path that we're on.  If we reach
         * the root level, we're done.
         */
        IN parent = null;
        IN nextIN = null;
        boolean nextINIsLatched = false;
        try {
            while (true) {

                /*
                 * Move up a level from where we are now and check to see if we
                 * reached the top of the tree.
                 */
                SearchResult result = null;
                nextIsLatched = false;
                result = getParentINForChildIN
                    (next, true /*requireExactMatch*/, cacheMode);
                if (result.exactParentFound) {
                    parent = result.parent;
                } else {
                    /* We've reached the root of the tree. */
                    assert (LatchSupport.countLatchesHeld() == 0):
                        LatchSupport.latchesHeldToString();
                    return null;
                }

                assert (LatchSupport.countLatchesHeld() == 1) :
                    LatchSupport.latchesHeldToString();

                /*
                 * Figure out which entry we are in the parent.  Add (subtract)
                 * 1 to move to the next (previous) one and check that we're
                 * still pointing to a valid child.  Don't just use the result
                 * of the parent.findEntry call in getParentNode, because we
                 * want to use our explicitly chosen idKey.
                 */
                int index = parent.findEntry(idKey, false, false);
                boolean moreEntriesThisBin = false;
                if (forward) {
                    index++;
                    if (index < parent.getNEntries()) {
                        moreEntriesThisBin = true;
                    }
                } else {
                    if (index > 0) {
                        moreEntriesThisBin = true;
                    }
                    index--;
                }

                if (moreEntriesThisBin) {

                    /*
                     * There are more entries to the right of the current path
                     * in parent.  Get the entry, and then descend down the
                     * left most path to a BIN.
                     */
                    nextIN = (IN) parent.fetchTargetWithExclusiveLatch(index);
                    nextIN.latch(cacheMode);
                    nextINIsLatched = true;

                    assert (LatchSupport.countLatchesHeld() == 2):
                        LatchSupport.latchesHeldToString();

                    if (nextIN.isBIN()) {
                        /* We landed at a leaf (i.e. a BIN). */
                        parent.releaseLatch();
                        parent = null; // to avoid falsely unlatching parent
                        TreeWalkerStatsAccumulator treeStatsAccumulator =
                            getTreeStatsAccumulator();
                        if (treeStatsAccumulator != null) {
                            nextIN.accumulateStats(treeStatsAccumulator);
                        }

                        return (BIN) nextIN;
                    } else {

                        /*
                         * We landed at an IN.  Descend down to the appropriate
                         * leaf (i.e. BIN) node.
                         */
                        IN ret = searchSubTree(nextIN, null,
                                               (forward ?
                                                SearchType.LEFT :
                                                SearchType.RIGHT),
                                               null,
                                               cacheMode,
                                               null /*searchComparator*/);
                        nextINIsLatched = false;
                        parent.releaseLatch();
                        parent = null; // to avoid falsely unlatching parent

                        assert LatchSupport.countLatchesHeld() == 1:
                            LatchSupport.latchesHeldToString();

                        if (ret.isBIN()) {
                            return (BIN) ret;
                        } else {
                            throw EnvironmentFailureException.unexpectedState
                                ("subtree did not have a BIN for leaf");
                        }
                    }
                }

                /* Nothing at this level.  Ascend to a higher level. */
                next = parent;
                nextIsLatched = true;
                parent = null; // to avoid falsely unlatching parent below
            }
        } catch (DatabaseException e) {

            if (next != null &&
                nextIsLatched) {
                next.releaseLatch();
            }

            if (parent != null) {
                parent.releaseLatch();
            }

            if (nextIN != null &&
                nextINIsLatched) {
                nextIN.releaseLatch();
            }

            throw e;
        }
    }

    /**
     * Split the root of the tree.
     */
    private void splitRoot(CacheMode cacheMode)
        throws DatabaseException {

        /*
         * Create a new root IN, insert the current root IN into it, and then
         * call split.
         */
        EnvironmentImpl env = database.getDbEnvironment();
        LogManager logManager = env.getLogManager();
        INList inMemoryINs = env.getInMemoryINs();

        IN curRoot = null;
        curRoot = (IN) root.fetchTarget(database, null);
        curRoot.latch(cacheMode);
        long curRootLsn = 0;
        long logLsn = 0;
        IN newRoot = null;
        try {

            /*
             * Make a new root IN, giving it an id key from the previous root.
             */
            byte[] rootIdKey = curRoot.getKey(0);
            newRoot = new IN(database, rootIdKey, maxTreeEntriesPerNode,
                             curRoot.getLevel() + 1);
            newRoot.latch(cacheMode);
            newRoot.setIsRoot(true);
            curRoot.setIsRoot(false);

            /*
             * Make the new root IN point to the old root IN. Log the old root
             * provisionally, because we modified it so it's not the root
             * anymore, then log the new root. We are guaranteed to be able to
             * insert entries, since we just made this root.
             */
            try {
                curRootLsn =
                    curRoot.optionalLogProvisional(logManager, newRoot);
                boolean insertOk = newRoot.insertEntry
                    (new ChildReference(curRoot, rootIdKey, curRootLsn));
                assert insertOk;

                logLsn = newRoot.optionalLog(logManager);
            } catch (DatabaseException e) {
                /* Something went wrong when we tried to log. */
                curRoot.setIsRoot(true);
                throw e;
            }
            inMemoryINs.add(newRoot);

            /*
             * Make the tree's root reference point to this new node. Now the
             * MapLN is logically dirty, but the change hasn't been logged.  Be
             * sure to flush the MapLN if we ever evict the root.
             */
            root.setTarget(newRoot);
            root.updateLsnAfterOptionalLog(database, logLsn);
            curRoot.split(newRoot, 0, maxTreeEntriesPerNode, cacheMode);
            root.setLsn(newRoot.getLastLoggedVersion());

        } finally {
            /* FindBugs ignore possible null pointer dereference of newRoot. */
            newRoot.releaseLatch();
            curRoot.releaseLatch();
        }
        rootSplits.increment();
        traceSplitRoot(Level.FINE, TRACE_ROOT_SPLIT, newRoot, logLsn,
                       curRoot, curRootLsn);
    }

    /**
     * Search the tree, starting at the root.  Depending on search type either
     * search using key, or search all the way down the right or left sides.
     * Stop the search either when the bottom of the tree is reached, or a node
     * matching nid is found (see below) in which case that node's parent is
     * returned.
     *
     * Preemptive splitting is not done during the search.
     *
     * @param key - the key to search for, or null if searchType is LEFT or
     * RIGHT.
     *
     * @param searchType - The type of tree search to perform.  NORMAL means
     * we're searching for key in the tree.  LEFT/RIGHT means we're descending
     * down the left or right side, resp.  DELETE means we're descending the
     * tree and will return the lowest node in the path that has > 1 entries.
     *
     * @param binBoundary - If non-null, information is returned about whether
     * the BIN found is the first or last BIN in the database.
     *
     * @return - the Node that matches the criteria, if any.  This is the node
     * that is farthest down the tree with a match.  Returns null if the root
     * is null.  Node is latched (unless it's null) and must be unlatched by
     * the caller.  Only IN's and BIN's are returned, not LN's.  In a NORMAL
     * search, It is the caller's responsibility to do the findEntry() call on
     * the key and BIN to locate the entry that matches key.  The return value
     * node is latched upon return and it is the caller's responsibility to
     * unlatch it.
     */
    public IN search(byte[] key,
                     SearchType searchType,
                     BINBoundary binBoundary,
                     CacheMode cacheMode,
                     Comparator<byte[]> searchComparator) {

        IN rootIN = getRootIN(cacheMode);

        if (rootIN != null) {
            return searchSubTree(rootIN, key, searchType, binBoundary,
                                 cacheMode, searchComparator);
        } else {
            return null;
        }
    }

    /**
     * Do a key based search, permitting pre-emptive splits. Returns the
     * target node's parent.
     */
    public IN searchSplitsAllowed(byte[] key,
                                  CacheMode cacheMode,
                                  Comparator<byte[]> searchComparator) {
        IN insertTarget = null;
        while (insertTarget == null) {
            rootLatch.acquireShared();
            boolean rootLatched = true;
            boolean rootLatchedExclusive = false;
            boolean rootINLatched = false;
            boolean success = false;
            IN rootIN = null;
            try {
                while (true) {
                    if (rootExists()) {
                        rootIN = (IN) root.fetchTarget(database, null);

                        /* Check if root needs splitting. */
                        if (rootIN.needsSplitting()) {
                            if (!rootLatchedExclusive) {
                                rootIN = null;
                                rootLatch.release();
                                rootLatch.acquireExclusive();
                                rootLatchedExclusive = true;
                                continue;
                            }
                            splitRoot(cacheMode);

                            /*
                             * We can't hold any latches while we lock.  If the
                             * root splits again between latch release and
                             * DbTree.db lock, no problem.  The latest root
                             * will still get written out.
                             */
                            rootLatch.release();
                            rootLatched = false;
                            EnvironmentImpl env = database.getDbEnvironment();
                            env.getDbTree().optionalModifyDbRoot(database);
                            rootLatched = true;
                            rootLatch.acquireExclusive();
                            rootIN = (IN) root.fetchTarget(database, null);
                            rootIN.latch(cacheMode);
                        } else {
                            rootIN.latchShared(cacheMode);
                        }
                        rootINLatched = true;
                    }
                    break;
                }
                success = true;
            } finally {
                if (!success && rootINLatched) {
                    rootIN.releaseLatch();
                }
                if (rootLatched) {
                    rootLatch.release();
                }
            }

            /* Don't loop forever if the root is null. [#13897] */
            if (rootIN == null) {
                break;
            }

            try {
                assert rootINLatched;
                while (true) {
                    try {
                        insertTarget = searchSubTreeSplitsAllowed
                            (rootIN, key, cacheMode, searchComparator);
                        break;
                    } catch (RelatchRequiredException RRE) {
                        relatchesRequired.increment();
                        database.getDbEnvironment().incRelatchesRequired();
                        rootLatch.acquireExclusive();
                        rootIN = (IN) root.fetchTarget(database, null);
                        rootIN.latch(cacheMode);
                        rootLatch.release();
                        continue;
                    }
                }
            } catch (SplitRequiredException e) {

                /*
                 * The last slot in the root was used at the point when this
                 * thread released the rootIN latch in order to force splits.
                 * Retry. SR [#11147].
                 */
                continue;
            }
        }

        return insertTarget;
    }

    public void loadStats(StatsConfig config, BtreeStats btreeStats) {
        /* Add the tree statistics to BtreeStats. */
        btreeStats.setTreeStats(stats.cloneGroup(false));

        if (config.getClear()) {
            relatchesRequired.clear();
        }
    }

    /**
     * Wrapper for searchSubTreeInternal that does a restart if a
     * RelatchRequiredException is thrown (i.e. a relatch of the root is
     * needed).
     */
    private IN searchSubTree(IN parent,
                             byte[] key,
                             SearchType searchType,
                             BINBoundary binBoundary,
                             CacheMode cacheMode,
                             Comparator<byte[]> searchComparator) {

        /*
         * If a an intermediate IN (e.g., from getNextBinInternal) was
         * originally passed, it was latched exclusively.
         */
        assert parent == null ||
               parent.isRoot() ||
               parent.isLatchOwnerForWrite();

        /*
         * Max of two iterations required.  First is root latched shared, and
         * second is root latched exclusive.
         */
        for (int i = 0; i < 2; i++) {
            try {
                return searchSubTreeInternal
                    (parent, key, searchType, binBoundary, cacheMode,
                     searchComparator);
            } catch (RelatchRequiredException RRE) {

                /*
                 * The original parent param was the DB root IN if this
                 * exception occurs, so latch it exclusively and retry.  If an
                 * intermediate IN was originally passed, it was latched
                 * exclusively and this can't happen.  See assertion at top of
                 * method.
                 */
                parent = getRootINLatchedExclusive(cacheMode);
            }
        }

        throw EnvironmentFailureException.unexpectedState
            ("searchSubTreeInternal should have completed in two tries");
    }

    /**
     * Searches a portion of the tree starting at parent using key.  If
     * searchType is NORMAL, then key must be supplied to guide the search.  If
     * searchType is LEFT (or RIGHT), then the tree is searched down the left
     * (or right) side to find the first (or last) leaf, respectively.
     * <p>
     * Enters with parent latched, assuming it's not null.  Exits with the
     * return value latched, assuming it's not null.
     *
     * @param parent - the root of the subtree to start the search at.  This
     * node should be latched by the caller and will be unlatched prior to
     * return.
     *
     * @param key - the key to search for, unless searchType is LEFT or RIGHT
     *
     * @param searchType - NORMAL means search using key
     *                     LEFT means find the first (leftmost) leaf
     *                     RIGHT means find the last (rightmost) leaf
     *
     * @return - the node matching the argument criteria, or null.  The node is
     * latched and must be unlatched by the caller.  The parent argument and
     * any other nodes that are latched during the search are unlatched prior
     * to return.
     *
     * @throws RelatchRequiredException if the root node (parent) must be
     * relatched exclusively because a null target was encountered (i.e. a
     * fetch must be performed on parent's child and the parent is latched
     * shared.
     */
    private IN searchSubTreeInternal(IN parent,
                                     byte[] key,
                                     SearchType searchType,
                                     BINBoundary binBoundary,
                                     CacheMode cacheMode,
                                     Comparator<byte[]> searchComparator)
        throws RelatchRequiredException {

        /* Return null if we're passed a null arg. */
        if (parent == null) {
            return null;
        }

        if ((searchType == SearchType.LEFT ||
             searchType == SearchType.RIGHT) &&
            key != null) {

            /*
             * If caller is asking for a right or left search, they shouldn't
             * be passing us a key.
             */
            throw EnvironmentFailureException.unexpectedState
                ("searchSubTree passed key and left/right search");
        }

        assert parent.isLatchOwnerForRead();

        if (binBoundary != null) {
            binBoundary.isLastBin = true;
            binBoundary.isFirstBin = true;
        }

        int index;
        IN child = null;
        IN grandParent = null;
        boolean childIsLatched = false;
        boolean grandParentIsLatched = false;
        boolean maintainGrandParentLatches = !parent.isLatchOwnerForWrite();

        TreeWalkerStatsAccumulator treeStatsAccumulator =
            getTreeStatsAccumulator();

        boolean success = false;
        try {
            do {
                if (treeStatsAccumulator != null) {
                    parent.accumulateStats(treeStatsAccumulator);
                }

                if (parent.getNEntries() == 0) {
                    /* No more children, can't descend anymore. */
                    success = true;
                    return parent;
                } else if (searchType == SearchType.NORMAL) {
                    /* Look for the entry matching key in the current node. */
                    index = parent.findEntry(key, false, false,
                                             searchComparator);
                } else if (searchType == SearchType.LEFT) {
                    /* Left search, always take the 0th entry. */
                    index = 0;
                } else if (searchType == SearchType.RIGHT) {
                    /* Right search, always take the highest entry. */
                    index = parent.getNEntries() - 1;
                } else {
                    throw EnvironmentFailureException.unexpectedState
                        ("Invalid value of searchType: " + searchType);
                }

                assert index >= 0;

                if (binBoundary != null) {
                    if (index != parent.getNEntries() - 1) {
                        binBoundary.isLastBin = false;
                    }
                    if (index != 0) {
                        binBoundary.isFirstBin = false;
                    }
                }

                /*
                 * Get the child node.  If target is null, and we don't have
                 * parent latched exclusively, then we need to relatch this
                 * parent so that we can fill in the target.  Fetching a target
                 * is a write to a node so it must be exclusively latched.
                 * Once we have the parent relatched exclusively, then we can
                 * release the grand parent.
                 */
                if (maintainGrandParentLatches &&
                    parent.getTarget(index) == null &&
                    !parent.isAlwaysLatchedExclusively()) {

                    if (grandParent == null) {

                        /*
                         * grandParent is null which implies parent is the root
                         * so throw RelatchRequiredException.
                         */
                        throw
                            RelatchRequiredException.relatchRequiredException;
                    } else {
                        /* Release parent shared and relatch exclusive. */
                        parent.releaseLatch();
                        parent.latch(cacheMode);
                    }

                    /*
                     * Once parent has been re-latched exclusive we can release
                     * grandParent now (sooner), rather than after the
                     * fetchTarget (later).
                     */
                    if (grandParent != null) {
                        grandParent.releaseLatch();
                        grandParentIsLatched = false;
                        grandParent = null;
                    }
                }
                child = (IN) parent.fetchTarget(index);

                /*
                 * We know we're done with grandParent for sure, so release
                 * now.
                 */
                if (grandParent != null) {
                    grandParent.releaseLatch();
                    grandParentIsLatched = false;
                }

                /* See if we're even using shared latches. */
                if (maintainGrandParentLatches) {
                    /* Note that BINs are always latched exclusive. */
                    child.latchShared(cacheMode);
                } else {
                    child.latch(cacheMode);
                }
                childIsLatched = true;

                if (treeStatsAccumulator != null) {
                    child.accumulateStats(treeStatsAccumulator);
                }

                /* Continue down a level */
                if (maintainGrandParentLatches) {
                    grandParent = parent;
                    grandParentIsLatched = true;
                } else {
                    parent.releaseLatch();
                }
                parent = child;
            } while (!parent.isBIN());

            success = true;
            return child;
        } finally {
            if (!success) {

                /*
                 * In [#14903] we encountered a latch exception below and the
                 * original exception was lost.  Print the stack trace and
                 * allow the original exception to be thrown if this happens
                 * again, to get more information about the problem.
                 */
                try {
                    if (child != null &&
                        childIsLatched) {
                        child.releaseLatch();
                    }

                    if (parent != child) {
                        parent.releaseLatch();
                    }
                } catch (Exception t2) {
                    t2.printStackTrace();
                }
            }

            if (grandParent != null &&
                grandParentIsLatched) {
                grandParent.releaseLatch();
                grandParentIsLatched = false;
            }
        }
    }

    /**
     * Search down the tree using a key, but instead of returning the BIN that
     * houses that key, find the point where we can detach a deletable
     * subtree. A deletable subtree is a branch where each IN has one child,
     * and the bottom BIN has no entries and no resident cursors. That point
     * can be found by saving a pointer to the lowest node in the path with
     * more than one entry.
     *
     *              INa
     *             /   \
     *          INb    INc
     *          |       |
     *         INd     ..
     *         / \
     *      INe  ..
     *       |
     *     BINx (suspected of being empty)
     *
     * In this case, we'd like to prune off the subtree headed by INe. INd
     * is the parent of this deletable subtree. As we descend, we must keep
     * latches for all the nodes that will be logged. In this case, we
     * will need to keep INa, INb and INd latched when we return from this
     * method.
     *
     * The method returns a list of parent/child/index structures. In this
     * example, the list will hold:
     *  INa/INb/index
     *  INb/INd/index
     *  INd/INe/index
     * Every node is latched, and every node except for the bottom most child
     * (INe) must be logged.
     */
    public void searchDeletableSubTree(IN parent,
                                       byte[] key,
                                       ArrayList<SplitInfo> nodeLadder)
        throws DatabaseException,
               NodeNotEmptyException,
               CursorsExistException {

        assert (parent!=null);
        assert (key!= null);
        assert parent.isLatchOwnerForWrite();

        int index;
        IN child = null;

        /* Save the lowest IN in the path that has multiple entries. */
        IN lowestMultipleEntryIN = null;

        do {
            if (parent.getNEntries() == 0) {
                break;
            }

            /* Remember if this is the lowest multiple point. */
            if (parent.getNEntries() > 1) {
                lowestMultipleEntryIN = parent;
            }

            index = parent.findEntry(key, false, false);
            assert index >= 0;

            /* Get the child node that matches. */
            child = (IN) parent.fetchTargetWithExclusiveLatch(index);
            child.latch(CacheMode.UNCHANGED);
            nodeLadder.add(new SplitInfo(parent, child, index));

            /* Continue down a level */
            parent = child;
        } while (!parent.isBIN());

        /*
         * See if there is a reason we can't delete this BIN -- i.e.
         * new items have been inserted, or a cursor exists on it.
         */
        if ((child != null) && child.isBIN()) {
            if (child.getNEntries() != 0) {
                throw NodeNotEmptyException.NODE_NOT_EMPTY;
            }

            /*
             * This case can happen if we are keeping a BIN on an empty
             * cursor as we traverse.
             */
            if (((BIN) child).nCursors() > 0) {
                throw CursorsExistException.CURSORS_EXIST;
            }
        }

        if (lowestMultipleEntryIN != null) {

            /*
             * Release all nodes up to the pair that holds the detach
             * point. We won't be needing those nodes, since they'll be
             * pruned and won't need to be updated.
             */
            ListIterator<SplitInfo> iter =
                nodeLadder.listIterator(nodeLadder.size());
            while (iter.hasPrevious()) {
                SplitInfo info = iter.previous();
                if (info.parent == lowestMultipleEntryIN) {
                    break;
                } else {
                    info.child.releaseLatch();
                    iter.remove();
                }
            }
        } else {

            /*
             * We actually have to prune off the entire tree. Release
             * all latches, and clear the node ladder.
             */
            releaseNodeLadderLatches(nodeLadder);
            nodeLadder.clear();
        }
    }

    /**
     * Search the portion of the tree starting at the parent, permitting
     * preemptive splits.
     *
     * When this returns, parent will be unlatched unless parent is the
     * returned IN.
     */
    private IN searchSubTreeSplitsAllowed(IN parent,
                                          byte[] key,
                                          CacheMode cacheMode,
                                          Comparator<byte[]> searchComparator)
        throws RelatchRequiredException,
               SplitRequiredException {

        if (parent != null) {

            /*
             * Search downward until we hit a node that needs a split. In that
             * case, retreat to the top of the tree and force splits downward.
             */
            while (true) {
                try {
                    return searchSubTreeUntilSplit(parent, key, cacheMode,
                                                   searchComparator);
                } catch (SplitRequiredException e) {
                    /* SR [#11144]*/
                    assert TestHookExecute.doHookIfSet(waitHook);

                    /*
                     * ForceSplit may itself throw SplitRequiredException if it
                     * finds that the parent doesn't have room to hold an extra
                     * entry. Allow the exception to propagate up to a place
                     * where it's safe to split the parent. We do this rather
                     * than
                     */
                    parent = forceSplit(parent, key, cacheMode);
                }
            }
        } else {
            return null;
        }
    }

    /**
     * Search the subtree, but throw an exception when we see a node
     * that has to be split.
     *
     * When this returns, parent will be unlatched unless parent is the
     * returned IN.
     */
    private IN searchSubTreeUntilSplit(IN parent,
                                       byte[] key,
                                       CacheMode cacheMode,
                                       Comparator<byte[]> searchComparator)
        throws RelatchRequiredException,
               SplitRequiredException {

        boolean latchingIsExclusive = parent.isLatchOwnerForWrite();

        int index;
        IN child = null;
        boolean childIsLatched = false;
        boolean success = false;

        try {
            do {
                if (parent.getNEntries() == 0) {
                    /* No more children, can't descend anymore. */
                    success = true;
                    return parent;
                } else {
                    /* Look for the entry matching key in the current node. */
                    index = parent.findEntry(key, false, false,
                                             searchComparator);
                }

                assert index >= 0;

                /* Get the child node that matches. */
                child = (IN) parent.fetchTarget(index);
                if (latchingIsExclusive) {
                    child.latch(cacheMode);
                } else {
                    /* Note that BINs are always latched exclusive. */
                    child.latchShared(cacheMode);
                }
                childIsLatched = true;

                /* Throw if we need to split. */
                if (child.needsSplitting()) {
                    /* Try compressing and check again. */
                    database.getDbEnvironment().lazyCompress(child);
                    if (child.needsSplitting()) {
                        /* Let the finally release child and parent latches. */
                        throw splitRequiredException;
                    }
                }

                /* Continue down a level */
                parent.releaseLatch();
                parent = child;
            } while (!parent.isBIN());
            success = true;
            return parent;
        } finally {
            if (!success) {
                if (child != null &&
                    childIsLatched) {
                    child.releaseLatch();
                }
                if (parent != child) {
                    parent.releaseLatch();
                }
            }
        }
    }

    /**
     * Do pre-emptive splitting in the subtree topped by the "parent" node.
     * Search down the tree until we get to the BIN level, and split any nodes
     * that fit the splittable requirement.
     *
     * Note that more than one node in the path may be splittable. For example,
     * a tree might have a level2 IN and a BIN that are both splittable, and
     * would be encountered by the same insert operation.
     *
     * @return the parent to use for retrying the search, which may be
     * different than the parent parameter passed if the root IN has been
     * evicted.
     */
    private IN forceSplit(IN parent, byte[] key, CacheMode cacheMode)
        throws DatabaseException, SplitRequiredException {

        ArrayList<SplitInfo> nodeLadder = new ArrayList<SplitInfo>();

        boolean allLeftSideDescent = true;
        boolean allRightSideDescent = true;
        int index;
        IN child = null;
        IN originalParent = parent;
        ListIterator<SplitInfo> iter = null;

        boolean isRootLatched = false;
        boolean success = false;
        try {

            /*
             * Latch the root in order to update the root LSN when we're done.
             * Latch order must be: root, root IN.  We'll leave this method
             * with the original parent latched.
             *
             * Although we are checking isDbRoot without latching, if it
             * changes (if the root is split) we'll detect this below and throw
             * splitRequiredException.  Note that this property can change from
             * true to false, but never from false to true.
             */
            if (originalParent.isDbRoot()) {
                rootLatch.acquireExclusive();
                isRootLatched = true;
                /* The root IN may have been evicted. [#16173] */
                parent = (IN) root.fetchTarget(database, null);
                originalParent = parent;
            }
            originalParent.latch(cacheMode);

            /*
             * Another thread may have crept in and
             *  - used the last free slot in the parent, making it impossible
             *    to correctly progagate the split.
             *  - actually split the root, in which case we may be looking at
             *    the wrong subtree for this search.
             * If so, throw and retry from above. SR [#11144]
             */
            if (originalParent.needsSplitting() || !originalParent.isRoot()) {
                throw splitRequiredException;
            }

            /*
             * Search downward to the BIN level, saving the information
             * needed to do a split if necessary.
             */
            do {
                if (parent.getNEntries() == 0) {
                    /* No more children, can't descend anymore. */
                    break;
                } else {
                    /* Look for the entry matching key in the current node. */
                    index = parent.findEntry(key, false, false);
                    if (index != 0) {
                        allLeftSideDescent = false;
                    }
                    if (index != (parent.getNEntries() - 1)) {
                        allRightSideDescent = false;
                    }
                }

                assert index >= 0;

                /*
                 * Get the child node that matches. We only need to work on
                 * nodes in residence.
                 */
                child = (IN) parent.getTarget(index);
                if (child == null) {
                    break;
                } else {
                    child.latch(cacheMode);
                    nodeLadder.add(new SplitInfo(parent, child, index));
                }

                /* Continue down a level */
                parent = child;
            } while (!parent.isBIN());

            boolean startedSplits = false;
            LogManager logManager =
                database.getDbEnvironment().getLogManager();

            /*
             * Process the accumulated nodes from the bottom up. Split each
             * node if required. If the node should not split, we check if
             * there have been any splits on the ladder yet. If there are none,
             * we merely release the node, since there is no update.  If splits
             * have started, we need to propagate new LSNs upward, so we log
             * the node and update its parent.
             *
             * Start this iterator at the end of the list.
             */
            iter = nodeLadder.listIterator(nodeLadder.size());
            long lastParentForSplit = Node.NULL_NODE_ID;
            while (iter.hasPrevious()) {
                SplitInfo info = iter.previous();

                /*
                 * Get rid of current entry on nodeLadder so it doesn't get
                 * unlatched in the finally clause.
                 */
                iter.remove();
                child = info.child;
                parent = info.parent;
                index = info.index;

                /* Opportunistically split the node if it is full. */
                if (child.needsSplitting()) {
                    if (allLeftSideDescent || allRightSideDescent) {
                        child.splitSpecial(parent,
                                           index,
                                           maxTreeEntriesPerNode,
                                           key,
                                           allLeftSideDescent,
                                           cacheMode);
                    } else {
                        child.split(parent, index, maxTreeEntriesPerNode,
                                    cacheMode);
                    }
                    lastParentForSplit = parent.getNodeId();
                    startedSplits = true;

                    /*
                     * If the DB root IN was logged, update the DB tree's child
                     * reference.  Now the MapLN is logically dirty, but the
                     * change hasn't been logged. Set the rootIN to be dirty
                     * again, to force flushing the rootIN and mapLN in the
                     * next checkpoint. Be sure to flush the MapLN
                     * if we ever evict the root.
                     */
                    if (parent.isDbRoot()) {
                        assert isRootLatched;
                        root.setLsn(parent.getLastLoggedVersion());
                        parent.setDirty(true);
                    }
                } else {
                    if (startedSplits) {
                        long newLsn = 0;

                        /*
                         * If this child was the parent of a split, it's
                         * already logged by the split call. We just need to
                         * propagate the logging upwards. If this child is just
                         * a link in the chain upwards, log it.
                         */
                        if (lastParentForSplit == child.getNodeId()) {
                            newLsn = child.getLastLoggedVersion();
                        } else {
                            newLsn = child.optionalLog(logManager);
                        }
                        parent.updateEntry(index, newLsn);
                    }
                }
                child.releaseLatch();
                child = null;
            }
            success = true;
        } finally {
            if (!success) {

                /*
                 * This will only happen if an exception is thrown and we leave
                 * things in an intermediate state.
                 */
                if (child != null) {
                    child.releaseLatch();
                }

                if (nodeLadder.size() > 0) {
                    iter = nodeLadder.listIterator(nodeLadder.size());
                    while (iter.hasPrevious()) {
                        SplitInfo info = iter.previous();
                        info.child.releaseLatch();
                    }
                }

                originalParent.releaseLatch();
            }

            if (isRootLatched) {
                rootLatch.release();
            }
        }
        return originalParent;
    }

    /**
     * Helper to obtain the root IN with shared root latching.  Optionally
     * updates the generation of the root when latching it.
     */
    public IN getRootIN(CacheMode cacheMode)
        throws DatabaseException {

        return getRootINInternal(cacheMode, false/*exclusive*/);
    }

    /**
     * Helper to obtain the root IN with exclusive root latching.  Optionally
     * updates the generation of the root when latching it.
     */
    public IN getRootINLatchedExclusive(CacheMode cacheMode)
        throws DatabaseException {

        return getRootINInternal(cacheMode, true/*exclusive*/);
    }

    private IN getRootINInternal(CacheMode cacheMode, boolean exclusive)
        throws DatabaseException {

        rootLatch.acquireShared();
        IN rootIN = null;
        try {
            if (rootExists()) {
                rootIN = (IN) root.fetchTarget(database, null);
                if (exclusive) {
                    rootIN.latch(cacheMode);
                } else {
                    rootIN.latchShared(cacheMode);
                }
            }
            return rootIN;
        } finally {
            rootLatch.release();
        }
    }

    public IN getResidentRootIN(boolean latched)
        throws DatabaseException {

        IN rootIN = null;
        if (rootExists()) {
            rootIN = (IN) root.getTarget();
            if (rootIN != null && latched) {
                rootIN.latchShared(CacheMode.UNCHANGED);
            }
        }
        return rootIN;
    }

    /**
     * Find the BIN that is relevant to the insert.  If the tree doesn't exist
     * yet, then create the first IN and BIN.  On return, the cursor is set to
     * the BIN that is found or created, and the BIN is latched.
     */
    public void findBinForInsert(final byte[] key, final CursorImpl cursor) {

        /*
         * First try using the BIN at the cursor position to avoid a search.
         *
         * Note that although the cursor has a BIN property that we can try to
         * leverage, the cursor is not added to that BIN.  This is important
         * because when we fall through and do a search, and the BIN needs
         * spliting, we compress to avoid splitting the BIN when it contains
         * slots that are deleted.  If the cursor were added to the BIN,
         * compression would not be possible.
         */
        BIN bin = cursor.latchBIN();
        if (bin != null) {
            if (!bin.needsSplitting() && bin.isKeyInBounds(key)) {
                return;
            } else {
                bin.releaseLatch();
            }
        }

        boolean rootLatchIsHeld = false;
        try {
            long logLsn;

            /*
             * We may have to try several times because of a small
             * timing window, explained below.
             */
            while (true) {
                rootLatchIsHeld = true;
                rootLatch.acquireShared();
                if (!rootExists()) {
                    rootLatch.release();
                    rootLatch.acquireExclusive();
                    if (rootExists()) {
                        rootLatch.release();
                        rootLatchIsHeld = false;
                        continue;
                    }

                    final CacheMode cacheMode = cursor.getCacheMode();
                    final EnvironmentImpl env = database.getDbEnvironment();
                    final LogManager logManager = env.getLogManager();
                    final INList inMemoryINs = env.getInMemoryINs();

                    /*
                     * This is an empty tree, either because it's brand new
                     * tree or because everything in it was deleted. Create an
                     * IN and a BIN.  We could latch the rootIN here, but
                     * there's no reason to since we're just creating the
                     * initial tree and we have the rootLatch held. Log the
                     * nodes as soon as they're created, but remember that
                     * referred-to children must come before any references to
                     * their LSNs.
                     */

                    /* First BIN in the tree, log provisionally right away. */
                    bin = new BIN(database, key, maxTreeEntriesPerNode, 1);
                    bin.latch(cacheMode);
                    logLsn = bin.optionalLogProvisional(logManager, null);

                    /*
                     * Log the root right away. Leave the root dirty, because
                     * the MapLN is not being updated, and we want to avoid
                     * this scenario from [#13897], where the LN has no
                     * possible parent.
                     *  provisional BIN
                     *  root IN
                     *  checkpoint start
                     *  LN is logged
                     *  checkpoint end
                     *  BIN is dirtied, but is not part of checkpoint
                     */

                    IN rootIN =
                        new IN(database, key, maxTreeEntriesPerNode, 2);

                    /*
                     * OK to latch the root after a child BIN because it's
                     * during creation.
                     */
                    rootIN.latch(cacheMode);
                    rootIN.setIsRoot(true);

                    boolean insertOk = rootIN.insertEntry
                        (new ChildReference(bin, key, logLsn));
                    assert insertOk;

                    logLsn = rootIN.optionalLog(logManager);
                    rootIN.setDirty(true)/*force re-logging, see [#13897]*/

                    root = makeRootChildReference(rootIN,
                                                  new byte[0],
                                                  logLsn);

                    rootIN.releaseLatch();

                    /* Add the new nodes to the in memory list. */
                    inMemoryINs.add(bin);
                    inMemoryINs.add(rootIN);
                    rootLatch.release();
                    rootLatchIsHeld = false;

                    break;
                } else {
                    rootLatch.release();
                    rootLatchIsHeld = false;

                    /*
                     * There's a tree here, so search for where we should
                     * insert. However, note that a window exists after we
                     * release the root latch. We release the latch because the
                     * search method expects to take the latch. After the
                     * release and before search, the INCompressor may come in
                     * and delete the entire tree, so search may return with a
                     * null.
                     */
                    IN in = searchSplitsAllowed(key, cursor.getCacheMode(),
                                                null /*searchComparator*/);
                    if (in == null) {
                        /* The tree was deleted by the INCompressor. */
                        continue;
                    } else {
                        /* search() found a BIN where this key belongs. */
                        bin = (BIN) in;
                        break;
                    }
                }
            }
        } finally {
            if (rootLatchIsHeld) {
                rootLatch.release();
            }
        }

        /* testing hook to insert item into log. */
        assert TestHookExecute.doHookIfSet(ckptHook);

        cursor.setBIN(bin);
    }

    /*
     * Given a subtree root (an IN), remove it and all of its children from the
     * in memory IN list. Also count removed nodes as obsolete and gather the
     * set of file summaries that should be logged. The localTracker will be
     * flushed to the log later.
     */
    private void accountForSubtreeRemoval(INList inList,
                                          IN subtreeRoot,
                                          LocalUtilizationTracker localTracker)
        throws DatabaseException {

        subtreeRoot.accountForSubtreeRemoval(inList, localTracker);

        LoggerUtils.envLogMsg(Level.FINE, database.getDbEnvironment(),
                           "SubtreeRemoval: subtreeRoot = " +
                           subtreeRoot.getNodeId());
    }

    /*
     * Logging support
     */

    /**
     * @see Loggable#getLogSize
     */
    public int getLogSize() {
        int size = 1;                          // rootExists
        if (root != null) {
            size += root.getLogSize();
        }
        return size;
    }

    /**
     * @see Loggable#writeToLog
     */
    public void writeToLog(ByteBuffer logBuffer) {
        byte booleans = (byte) ((root != null) ? 1 : 0);
        logBuffer.put(booleans);
        if (root != null) {
            root.writeToLog(logBuffer);
        }
    }

    /**
     * @see Loggable#readFromLog
     */
    public void readFromLog(ByteBuffer itemBuffer, int entryVersion) {
        boolean rootExists = false;
        byte booleans = itemBuffer.get();
        rootExists = (booleans & 1) != 0;
        if (rootExists) {
            root = makeRootChildReference();
            root.readFromLog(itemBuffer, entryVersion);
        }
    }

    /**
     * @see Loggable#dumpLog
     */
    public void dumpLog(StringBuilder sb, boolean verbose) {
        sb.append("<root>");
        if (root != null) {
            root.dumpLog(sb, verbose);
        }
        sb.append("</root>");
    }

    /**
     * @see Loggable#getTransactionId
     */
    public long getTransactionId() {
        return 0;
    }

    /**
     * @see Loggable#logicalEquals
     * Always return false, this item should never be compared.
     */
    public boolean logicalEquals(Loggable other) {
        return false;
    }

    /**
     * rebuildINList is used by recovery to add all the resident nodes to the
     * IN list.
     */
    public void rebuildINList()
        throws DatabaseException {

        INList inMemoryList = database.getDbEnvironment().getInMemoryINs();

        if (root != null) {
            rootLatch.acquireShared();
            try {
                Node rootIN = root.getTarget();
                if (rootIN != null) {
                    rootIN.rebuildINList(inMemoryList);
                }
            } finally {
                rootLatch.release();
            }
        }
    }

    /*
     * Debugging stuff.
     */
    public void dump() {
        System.out.println(dumpString(0));
    }

    public String dumpString(int nSpaces) {
        StringBuilder sb = new StringBuilder();
        sb.append(TreeUtils.indent(nSpaces));
        sb.append("<tree>");
        sb.append('\n');
        if (root != null) {
            sb.append(DbLsn.dumpString(root.getLsn(), nSpaces));
            sb.append('\n');
            IN rootIN = (IN) root.getTarget();
            if (rootIN == null) {
                sb.append("<in/>");
            } else {
                sb.append(rootIN.toString());
            }
            sb.append('\n');
        }
        sb.append(TreeUtils.indent(nSpaces));
        sb.append("</tree>");
        return sb.toString();
    }

    /**
     * Unit test support to validate subtree pruning. Didn't want to make root
     * access public.
     */
    boolean validateDelete(int index)
        throws DatabaseException {

        rootLatch.acquireShared();
        try {
            IN rootIN = (IN) root.fetchTarget(database, null);
            return rootIN.validateSubtreeBeforeDelete(index);
        } finally {
            rootLatch.release();
        }
    }

    /**
     * Debugging check that all resident nodes are on the INList and no stray
     * nodes are present in the unused portion of the IN arrays.
     */
    public void validateINList(IN parent)
        throws DatabaseException {

        if (parent == null) {
            parent = (IN) root.getTarget();
        }
        if (parent != null) {
            INList inList = database.getDbEnvironment().getInMemoryINs();
            if (!inList.contains(parent)) {
                throw EnvironmentFailureException.unexpectedState
                    ("IN " + parent.getNodeId() + " missing from INList");
            }
            for (int i = 0;; i += 1) {
                try {
                    Node node = parent.getTarget(i);
                    if (i >= parent.getNEntries()) {
                        if (node != null) {
                            throw EnvironmentFailureException.unexpectedState
                                ("IN " + parent.getNodeId() +
                                 " has stray node " + node +
                                 " at index " + i);
                        }
                        byte[] key = parent.getKey(i);
                        if (key != null) {
                            throw EnvironmentFailureException.unexpectedState
                                ("IN " + parent.getNodeId() +
                                 " has stray key " + key +
                                 " at index " + i);
                        }
                    }
                    if (node instanceof IN) {
                        validateINList((IN) node);
                    }
                } catch (ArrayIndexOutOfBoundsException e) {
                    break;
                }
            }
        }
    }

    /* For unit testing only. */
    public void setWaitHook(TestHook hook) {
        waitHook = hook;
    }

    /* For unit testing only. */
    public void setSearchHook(TestHook hook) {
        searchHook = hook;
    }

    /* For unit testing only. */
    public void setCkptHook(TestHook hook) {
        ckptHook = hook;
    }

    /**
     * Send trace messages to the java.util.logger. Don't rely on the logger
     * alone to conditionalize whether we send this message, we don't even want
     * to construct the message if the level is not enabled.
     */
    private void traceSplitRoot(Level level,
                                String splitType,
                                IN newRoot,
                                long newRootLsn,
                                IN oldRoot,
                                long oldRootLsn) {
        Logger logger = database.getDbEnvironment().getLogger();
        if (logger.isLoggable(level)) {
            StringBuilder sb = new StringBuilder();
            sb.append(splitType);
            sb.append(" newRoot=").append(newRoot.getNodeId());
            sb.append(" newRootLsn=").
                append(DbLsn.getNoFormatString(newRootLsn));
            sb.append(" oldRoot=").append(oldRoot.getNodeId());
            sb.append(" oldRootLsn=").
                append(DbLsn.getNoFormatString(oldRootLsn));
            LoggerUtils.logMsg
                (logger, database.getDbEnvironment(), level, sb.toString());
        }
    }

    private static class SplitInfo {
        IN parent;
        IN child;
        int index;

        SplitInfo(IN parent, IN child, int index) {
            this.parent = parent;
            this.child = child;
            this.index = index;
        }
    }
}
TOP

Related Classes of com.sleepycat.je.tree.Tree

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.