/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2002-2006
* Sleepycat Software. All rights reserved.
*
* $Id: Tree.java,v 1.405 2006/01/13 14:44:29 linda Exp $
*/
package com.sleepycat.je.tree;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.cleaner.UtilizationTracker;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.LogManager;
import com.sleepycat.je.log.LogReadable;
import com.sleepycat.je.log.LogUtils;
import com.sleepycat.je.log.LogWritable;
import com.sleepycat.je.recovery.RecoveryManager;
import com.sleepycat.je.txn.BasicLocker;
import com.sleepycat.je.txn.LockGrantType;
import com.sleepycat.je.txn.LockResult;
import com.sleepycat.je.txn.LockType;
import com.sleepycat.je.txn.Locker;
import com.sleepycat.je.txn.WriteLockInfo;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.Tracer;
/**
* Tree implements the JE B+Tree.
*
* A note on tree search patterns:
* There's a set of Tree.search* methods. Some clients of the tree use
* those search methods directly, whereas other clients of the tree
* tend to use methods built on top of search.
*
* The semantics of search* are
* they leave you pointing at a BIN or IN
* they don't tell you where the reference of interest is.
* they traverse a single tree, to jump into the duplicate tree, the
* caller has to take explicit action.
* The semantics of the get* methods are:
* they leave you pointing at a BIN or IN
* they return the index of the slot of interest
* they traverse down to whatever level is needed -- they'll take care of
* jumping into the duplicate tree.
* they are built on top of search* methods.
* For the future:
* Over time, we need to clarify which methods are to be used by clients
* of the tree. Preferably clients that call the tree use get*, although
* their are cases where they need visibility into the tree structure. For
* example, tee cursors use search* because they want to add themselves to
* BIN before jumping into the duplicate tree.
*
* Also, search* should return the location of the slot to save us a
* second binary search.
*/
public final class Tree implements LogWritable, LogReadable {
/* For debug tracing */
private static final String TRACE_ROOT_SPLIT = "RootSplit:";
private static final String TRACE_DUP_ROOT_SPLIT = "DupRootSplit:";
private static final String TRACE_MUTATE = "Mut:";
private static final String TRACE_INSERT = "Ins:";
private static final String TRACE_INSERT_DUPLICATE = "InsD:";
private DatabaseImpl database;
private ChildReference root;
private int maxMainTreeEntriesPerNode;
private int maxDupTreeEntriesPerNode;
private boolean purgeRoot;
/*
* Latch that must be held when using/accessing the root node. Protects
* against the root being changed out from underneath us by splitRoot.
*/
private SharedLatch rootLatch;
private TreeStats treeStats;
private ThreadLocal treeStatsAccumulatorTL = new ThreadLocal();
/*
* We don't need the stack trace on this so always throw a static and
* avoid the cost of Throwable.fillInStack() every time it's thrown.
* [#13354].
*/
private static SplitRequiredException splitRequiredException =
new SplitRequiredException();
/**
* Embodies an enum for the type of search being performed. NORMAL means
* do a regular search down the tree. LEFT/RIGHT means search down the
* left/right side to find the first/last node in the tree.
*/
public static class SearchType {
/* Search types */
public static final SearchType NORMAL = new SearchType();
public static final SearchType LEFT = new SearchType();
public static final SearchType RIGHT = new SearchType();
/* No lock types can be defined outside this class. */
private SearchType() {
}
}
/* For unit tests */
private TestHook waitHook; // used for generating race conditions
private TestHook searchHook; // [#12736]
private TestHook ckptHook; // [#13897]
/**
* Create a new tree.
*/
public Tree(DatabaseImpl database)
throws DatabaseException {
init(database);
setDatabase(database);
}
/**
* Create a tree that's being read in from the log.
*/
public Tree()
throws DatabaseException {
init(null);
maxMainTreeEntriesPerNode = 0;
maxDupTreeEntriesPerNode = 0;
}
/**
* constructor helper
*/
private void init(DatabaseImpl database) {
rootLatch =
LatchSupport.makeSharedLatch
("RootLatch",
(database != null) ? database.getDbEnvironment() : null);
treeStats = new TreeStats();
this.root = null;
this.database = database;
}
/**
* Set the database for this tree. Used by recovery when recreating an
* existing tree.
*/
public void setDatabase(DatabaseImpl database)
throws DatabaseException {
this.database = database;
maxMainTreeEntriesPerNode = database.getNodeMaxEntries();
maxDupTreeEntriesPerNode = database.getNodeMaxDupTreeEntries();
DbConfigManager configManager =
database.getDbEnvironment().getConfigManager();
purgeRoot = configManager.getBoolean
(EnvironmentParams.COMPRESSOR_PURGE_ROOT);
}
/**
* @return the database for this Tree.
*/
public DatabaseImpl getDatabase() {
return database;
}
/**
* Set the root for the tree. Should only be called within the root latch.
*/
public void setRoot(ChildReference newRoot, boolean notLatched) {
assert (notLatched || rootLatch.isWriteLockedByCurrentThread());
root = newRoot;
}
public ChildReference makeRootChildReference(Node target,
byte[] key,
long lsn) {
return new RootChildReference(target, key, lsn);
}
private ChildReference makeRootChildReference() {
return new RootChildReference();
}
/*
* Class that overrides fetchTarget() so that if the rootLatch is not
* held exclusively when the root is fetched, we upgrade it to exclusive.
*/
private class RootChildReference extends ChildReference {
private RootChildReference() {
super();
}
private RootChildReference(Node target, byte[] key, long lsn) {
super(target, key, lsn);
}
/* Not used. */
private RootChildReference(Node target,
byte[] key,
long lsn,
byte existingState) {
super(target, key, lsn, existingState);
}
/* Caller is responsible for releasing rootLatch. */
public Node fetchTarget(DatabaseImpl database, IN in)
throws DatabaseException {
if (getTarget() == null &&
!rootLatch.isWriteLockedByCurrentThread()) {
rootLatch.release();
rootLatch.acquireExclusive();
}
return super.fetchTarget(database, in);
}
public void setTarget(Node target) {
assert rootLatch.isWriteLockedByCurrentThread();
super.setTarget(target);
}
public void clearTarget() {
assert rootLatch.isWriteLockedByCurrentThread();
super.clearTarget();
}
public void setLsn(long lsn) {
assert rootLatch.isWriteLockedByCurrentThread();
super.setLsn(lsn);
}
}
/**
* Get LSN of the rootIN. Obtained without latching, should only be
* accessed while quiescent.
*/
public long getRootLsn() {
if (root == null) {
return DbLsn.NULL_LSN;
} else {
return root.getLsn();
}
}
/**
* @return the TreeStats for this tree.
*/
TreeStats getTreeStats() {
return treeStats;
}
private TreeWalkerStatsAccumulator getTreeStatsAccumulator() {
if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) {
return (TreeWalkerStatsAccumulator) treeStatsAccumulatorTL.get();
} else {
return null;
}
}
public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) {
treeStatsAccumulatorTL.set(tSA);
}
public IN withRootLatchedExclusive(WithRootLatched wrl)
throws DatabaseException {
try {
rootLatch.acquireExclusive();
return wrl.doWork(root);
} finally {
rootLatch.release();
}
}
public IN withRootLatchedShared(WithRootLatched wrl)
throws DatabaseException {
try {
rootLatch.acquireShared();
return wrl.doWork(root);
} finally {
rootLatch.release();
}
}
/**
* Deletes a BIN specified by key from the tree. If the BIN resides in a
* subtree that can be pruned away, prune as much as possible, so we
* don't leave a branch that has no BINs.
*
* It's possible that the targeted BIN will now have entries, or will
* have resident cursors. Either will prevent deletion.
*
* @param idKey - the identifier key of the node to delete.
* @param tracker is used for tracking obsolete node info.
*/
public void delete(byte[] idKey,
UtilizationTracker tracker)
throws DatabaseException,
NodeNotEmptyException,
CursorsExistException {
IN subtreeRootIN = null;
/*
* A delete is a reverse split that must be propagated up to the root.
* [#13501] Keep all nodes from the rootIN to the parent of the
* deletable subtree latched as we descend so we can log the
* IN deletion and cascade the logging up the tree. The latched
* nodes are kept in order in the nodeLadder.
*/
ArrayList nodeLadder = new ArrayList();
IN rootIN = null;
boolean rootNeedsUpdating = false;
rootLatch.acquireExclusive();
try {
if (root == null) {
return; // no action, tree is deleted.
}
rootIN = (IN) root.fetchTarget(database, null);
rootIN.latch(false);
searchDeletableSubTree(rootIN, idKey, nodeLadder);
if (nodeLadder.size() == 0) {
/*
* The root is the top of the deletable subtree. Delete the
* whole tree if the purge root je property is set.
* In general, there's no reason to delete this last
* IN->...IN->BIN subtree since we're likely to to add more
* nodes to this tree again. Deleting the subtree also
* adds to the space used by the log since a MapLN needs to
* be written when the root is nulled, and a MapLN, IN
* (root), BIN needs to be written when the root is
* recreated.
*
* Consider a queue application which frequently inserts
* and deletes entries and often times leaves the tree
* empty, but will insert new records again.
*
* An optimization might be to prune the multiple IN path
* to the last BIN (if it even exists) to just a root IN
* pointing to the single BIN, but this doesn't feel like
* it's worth the trouble since the extra depth doesn't
* matter all that much.
*/
if (purgeRoot) {
subtreeRootIN = logTreeRemoval(rootIN, tracker);
if (subtreeRootIN != null) {
rootNeedsUpdating = true;
}
}
} else {
/* Detach this subtree. */
SplitInfo detachPoint =
(SplitInfo) nodeLadder.get(nodeLadder.size() - 1);
boolean deleteOk =
detachPoint.parent.deleteEntry(detachPoint.index,
true);
assert deleteOk;
/* Cascade updates upward, including writing the root IN. */
rootNeedsUpdating = cascadeUpdates(nodeLadder, null, -1);
subtreeRootIN = detachPoint.child;
}
} finally {
releaseNodeLadderLatches(nodeLadder);
if (rootIN != null) {
rootIN.releaseLatch();
}
rootLatch.release();
}
if (subtreeRootIN != null) {
EnvironmentImpl envImpl = database.getDbEnvironment();
if (rootNeedsUpdating) {
/*
* modifyDbRoot will grab locks and we can't have the INList
* latches or root latch held while it tries to acquire locks.
*/
DbTree dbTree = envImpl.getDbMapTree();
dbTree.modifyDbRoot(database);
RecoveryManager.traceRootDeletion(Level.FINE, database);
}
/*
* Count obsolete nodes after logging the delete. We can do
* this without having the nodes of the subtree latched because the
* subtree has been detached from the tree.
*/
INList inList = envImpl.getInMemoryINs();
accountForSubtreeRemoval(inList, subtreeRootIN, tracker);
}
}
private void releaseNodeLadderLatches(ArrayList nodeLadder)
throws DatabaseException {
/*
* Clear any latches left in the node ladder. Release from the
* bottom up.
*/
ListIterator iter = nodeLadder.listIterator(nodeLadder.size());
while (iter.hasPrevious()) {
SplitInfo info = (SplitInfo) iter.previous();
info.child.releaseLatch();
}
}
/**
* This entire tree is empty, clear the root and log a new MapLN
* @return the rootIN that has been detached, or null if there
* hasn't been any removal.
*/
private IN logTreeRemoval(IN rootIN,
UtilizationTracker tracker)
throws DatabaseException {
assert rootLatch.isWriteLockedByCurrentThread();
IN detachedRootIN = null;
/**
* XXX: Suspect that validateSubtree is no longer needed, now that we
* hold all latches.
*/
if ((rootIN.getNEntries() <= 1) &&
(rootIN.validateSubtreeBeforeDelete(0))) {
root = null;
/*
* Record the root deletion for recovery. Do this within
* the root latch. We need to put this log entry into the
* log before another thread comes in and creates a new
* rootIN for this database.
*
* For example,
* LSN 1000 IN delete info entry
* LSN 1010 new IN, for next set of inserts
* LSN 1020 new BIN, for next set of inserts.
*
* The entry at 1000 is needed so that LSN 1010 will
* properly supercede all previous IN entries in the tree.
* Without the INDelete, we may not use the new root, because
* it has a different node id.
*/
EnvironmentImpl envImpl = database.getDbEnvironment();
LogManager logManager = envImpl.getLogManager();
logManager.log(new INDeleteInfo
(rootIN.getNodeId(),
rootIN.getIdentifierKey(),
database.getId()));
detachedRootIN = rootIN;
}
return detachedRootIN;
}
/**
* Update nodes for a delete, going upwards. For example, suppose a
* node ladder holds:
* INa, INb, index for INb in INa
* INb, INc, index for INc in INb
* INc, BINd, index for BINd in INc
*
* When we enter this method, BINd has already been removed from INc. We
* need to
* - log INc
* - update INb, log INb
* - update INa, log INa
*
* @param nodeLadder List of SplitInfos describing each node pair on the
* downward path
* @param binRoot parent of the dup tree, or null if this is not for
* dups.
* @param index slot occupied by this din tree.
* @return whether the DB root needs updating.
*/
private boolean cascadeUpdates(ArrayList nodeLadder,
BIN binRoot,
int index)
throws DatabaseException {
ListIterator iter = nodeLadder.listIterator(nodeLadder.size());
EnvironmentImpl envImpl = database.getDbEnvironment();
LogManager logManager = envImpl.getLogManager();
long newLsn = DbLsn.NULL_LSN;
SplitInfo info = null;
while (iter.hasPrevious()) {
info = (SplitInfo) iter.previous();
if (newLsn != DbLsn.NULL_LSN) {
info.parent.updateEntry(info.index, newLsn);
}
newLsn = info.parent.log(logManager);
}
boolean rootNeedsUpdating = false;
if (info != null) {
/* We've logged the top of this subtree, record it properly. */
if (info.parent.isDbRoot()) {
/* We updated the rootIN of the database. */
assert rootLatch.isWriteLockedByCurrentThread();
root.setLsn(newLsn);
rootNeedsUpdating = true;
} else if ((binRoot != null) && info.parent.isRoot()) {
/* We updated the DIN root of the database. */
binRoot.updateEntry(index, newLsn);
} else {
assert false;
}
}
return rootNeedsUpdating;
}
/**
* Delete a subtree of a duplicate tree. Find the duplicate tree using
* mainKey in the top part of the tree and idKey in the duplicate tree.
*
* @param idKey the identifier key to be used in the duplicate subtree to
* find the duplicate path.
* @param mainKey the key to be used in the main tree to find the
* duplicate subtree.
* @param tracker is used for tracking obsolete node info.
*
* @return true if the delete succeeded, false if there were still cursors
* present on the leaf DBIN of the subtree that was located.
*/
public void deleteDup(byte[] idKey,
byte[] mainKey,
UtilizationTracker tracker)
throws DatabaseException,
NodeNotEmptyException,
CursorsExistException {
/* Find the BIN that is the parent of this duplicate tree. */
IN in = search(mainKey, SearchType.NORMAL, -1, null,
false /*updateGeneration*/);
IN deletedSubtreeRoot = null;
try {
assert in.isLatchOwner();
assert in instanceof BIN;
assert in.getNEntries() > 0;
/* Find the appropriate entry in this BIN. */
int index = in.findEntry(mainKey, false, true);
if (index >= 0) {
deletedSubtreeRoot = deleteDupSubtree(idKey, (BIN) in, index);
}
} finally {
in.releaseLatch();
}
if (deletedSubtreeRoot != null) {
EnvironmentImpl envImpl = database.getDbEnvironment();
accountForSubtreeRemoval(envImpl.getInMemoryINs(),
deletedSubtreeRoot, tracker);
}
}
/**
* We enter and leave this method with 'bin' latched.
* @return the root of the subtree we have deleted, so it can be
* properly accounted for. May be null if nothing was deleted.
*/
private IN deleteDupSubtree(byte[] idKey,
BIN bin,
int index)
throws DatabaseException,
NodeNotEmptyException,
CursorsExistException {
EnvironmentImpl envImpl = database.getDbEnvironment();
boolean dupCountLNLocked = false;
DupCountLN dcl = null;
BasicLocker locker = new BasicLocker(envImpl);
/* Latch the DIN root. */
DIN duplicateRoot = (DIN) bin.fetchTarget(index);
duplicateRoot.latch(false);
ArrayList nodeLadder = new ArrayList();
IN subtreeRootIN = null;
try {
/*
* Read lock the dup count LN to ascertain whether there are any
* writers in the tree. XXX: This seems unnecessary now, revisit.
*/
ChildReference dclRef = duplicateRoot.getDupCountLNRef();
dcl = (DupCountLN) dclRef.fetchTarget(database, duplicateRoot);
LockResult lockResult = locker.nonBlockingLock(dcl.getNodeId(),
LockType.READ,
database);
if (lockResult.getLockGrant() == LockGrantType.DENIED) {
throw CursorsExistException.CURSORS_EXIST;
} else {
dupCountLNLocked = true;
}
/*
* We don't release the latch on bin before we search the
* duplicate tree below because we might be deleting the whole
* subtree from the IN and we want to keep it latched until we
* know.
*/
searchDeletableSubTree(duplicateRoot, idKey, nodeLadder);
LogManager logManager = envImpl.getLogManager();
if (nodeLadder.size() == 0) {
/* We're deleting the duplicate root. */
if (bin.nCursors() == 0) {
boolean deleteOk = bin.deleteEntry(index, true);
assert deleteOk;
/*
* Use an INDupDeleteInfo to make it clear that
* this duplicate tree has been eradicated. This
* is analagous to deleting a root; we must be sure
* that we can overlay another subtree onto this slot
* at recovery redo.
*/
logManager.log(new INDupDeleteInfo
(duplicateRoot.getNodeId(),
duplicateRoot.getMainTreeKey(),
duplicateRoot.getDupTreeKey(),
database.getId()));
subtreeRootIN = duplicateRoot;
if (bin.getNEntries() == 0) {
database.getDbEnvironment().
addToCompressorQueue(bin, null, false);
}
} else {
/*
* Cursors prevent us from deleting this dup tree, we'll
* have to retry.
*/
throw CursorsExistException.CURSORS_EXIST;
}
} else {
/* We're deleting a portion of the duplicate tree. */
SplitInfo detachPoint =
(SplitInfo) nodeLadder.get(nodeLadder.size() - 1);
boolean deleteOk =
detachPoint.parent.deleteEntry(detachPoint.index,
true);
assert deleteOk;
/*
* Cascade updates upward, including writing the root
* DIN and parent BIN.
*/
cascadeUpdates(nodeLadder, bin, index);
subtreeRootIN = detachPoint.child;
}
} finally {
releaseNodeLadderLatches(nodeLadder);
if (dupCountLNLocked) {
locker.releaseLock(dcl.getNodeId());
}
if (duplicateRoot != null) {
duplicateRoot.releaseLatch();
}
}
return subtreeRootIN;
}
/**
* Find the leftmost node (IN or BIN) in the tree. Do not descend into a
* duplicate tree if the leftmost entry of the first BIN refers to one.
*
* @return the leftmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public IN getFirstNode()
throws DatabaseException {
return search
(null, SearchType.LEFT, -1, null, true /*updateGeneration*/);
}
/**
* Find the rightmost node (IN or BIN) in the tree. Do not descend into a
* duplicate tree if the rightmost entry of the last BIN refers to one.
*
* @return the rightmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public IN getLastNode()
throws DatabaseException {
return search
(null, SearchType.RIGHT, -1, null, true /*updateGeneration*/);
}
/**
* Find the leftmost node (DBIN) in a duplicate tree.
*
* @return the leftmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public DBIN getFirstNode(DIN dupRoot)
throws DatabaseException {
if (dupRoot == null) {
throw new IllegalArgumentException
("getFirstNode passed null root");
}
assert dupRoot.isLatchOwner();
IN ret = searchSubTree
(dupRoot, null, SearchType.LEFT, -1, null,
true /*updateGeneration*/);
return (DBIN) ret;
}
/**
* Find the rightmost node (DBIN) in a duplicate tree.
*
* @return the rightmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public DBIN getLastNode(DIN dupRoot)
throws DatabaseException {
if (dupRoot == null) {
throw new IllegalArgumentException
("getLastNode passed null root");
}
assert dupRoot.isLatchOwner();
IN ret = searchSubTree
(dupRoot, null, SearchType.RIGHT, -1, null,
true /*updateGeneration*/);
return (DBIN) ret;
}
/**
* GetParentNode without optional tracking.
*/
public SearchResult getParentINForChildIN(IN child,
boolean requireExactMatch,
boolean updateGeneration)
throws DatabaseException {
return getParentINForChildIN
(child, requireExactMatch, updateGeneration, -1, null);
}
/**
* Return a reference to the parent or possible parent of the child. Used
* by objects that need to take a standalone node and find it in the tree,
* like the evictor, checkpointer, and recovery.
*
* @param child The child node for which to find the parent. This node is
* latched by the caller and is released by this function before returning
* to the caller.
*
* @param requireExactMatch if true, we must find the exact parent, not a
* potential parent.
*
* @param updateGeneration if true, set the generation count during
* latching. Pass false when the LRU should not be impacted, such as
* during eviction and checkpointing.
*
* @param trackingList if not null, add the LSNs of the parents visited
* along the way, as a debug tracing mechanism. This is meant to stay in
* production, to add information to the log.
*
* @return a SearchResult object. If the parent has been found,
* result.foundExactMatch is true. If any parent, exact or potential has
* been found, result.parent refers to that node.
*/
public SearchResult getParentINForChildIN(IN child,
boolean requireExactMatch,
boolean updateGeneration,
int targetLevel,
List trackingList)
throws DatabaseException {
/* Sanity checks */
if (child == null) {
throw new IllegalArgumentException("getParentNode passed null");
}
assert child.isLatchOwner();
/*
* Get information from child before releasing latch.
*/
byte[] mainTreeKey = child.getMainTreeKey();
byte[] dupTreeKey = child.getDupTreeKey();
boolean isRoot = child.isRoot();
child.releaseLatch();
return getParentINForChildIN(child.getNodeId(),
child.containsDuplicates(),
isRoot,
mainTreeKey,
dupTreeKey,
requireExactMatch,
updateGeneration,
targetLevel,
trackingList,
true);
}
/**
* Return a reference to the parent or possible parent of the child. Used
* by objects that need to take a node id and find it in the tree,
* like the evictor, checkpointer, and recovery.
*
* @param requireExactMatch if true, we must find the exact parent, not a
* potential parent.
*
* @param updateGeneration if true, set the generation count during
* latching. Pass false when the LRU should not be impacted, such as
* during eviction and checkpointing.
*
* @param trackingList if not null, add the LSNs of the parents visited
* along the way, as a debug tracing mechanism. This is meant to stay in
* production, to add information to the log.
*
* @param doFetch if false, stop the search if we run into a non-resident
* child. Used by the checkpointer to avoid conflicting with work done
* by the evictor.
*
* @param child The child node for which to find the parent. This node is
* latched by the caller and is released by this function before returning
* to the caller.
*
* @return a SearchResult object. If the parent has been found,
* result.foundExactMatch is true. If any parent, exact or potential has
* been found, result.parent refers to that node.
*/
public SearchResult getParentINForChildIN(long targetNodeId,
boolean targetContainsDuplicates,
boolean targetIsRoot,
byte[] targetMainTreeKey,
byte[] targetDupTreeKey,
boolean requireExactMatch,
boolean updateGeneration,
int targetLevel,
List trackingList,
boolean doFetch)
throws DatabaseException {
IN rootIN = getRootIN(updateGeneration);
SearchResult result = new SearchResult();
if (rootIN != null) {
/* The tracking list is a permanent tracing aid. */
if (trackingList != null) {
trackingList.add(new TrackingInfo(root.getLsn(),
rootIN.getNodeId()));
}
IN potentialParent = rootIN;
try {
while (result.keepSearching) {
/*
* [12736] Prune away oldBin. Assert has intentional
* side effect.
*/
assert TestHookExecute.doHookIfSet(searchHook);
potentialParent.findParent(SearchType.NORMAL,
targetNodeId,
targetContainsDuplicates,
targetIsRoot,
targetMainTreeKey,
targetDupTreeKey,
result,
requireExactMatch,
updateGeneration,
targetLevel,
trackingList,
doFetch);
potentialParent = result.parent;
}
} catch (Exception e) {
potentialParent.releaseLatchIfOwner();
throw new DatabaseException(e);
}
}
return result;
}
/**
* Return a reference to the parent of this LN. This searches through the
* main and duplicate tree and allows splits. Set the tree location to the
* proper BIN parent whether or not the LN child is found. That's because
* if the LN is not found, recovery or abort will need to place it within
* the tree, and so we must point at the appropriate position.
*
* <p>When this method returns with location.bin non-null, the BIN is
* latched and must be unlatched by the caller. Note that location.bin may
* be non-null even if this method returns false.</p>
*
* @param location a holder class to hold state about the location
* of our search. Sort of an internal cursor.
*
* @param mainKey key to navigate through main key
*
* @param dupKey key to navigate through duplicate tree. May be null, since
* deleted lns have no data.
*
* @param ln the node instantiated from the log
*
* @param splitsAllowed true if this method is allowed to cause tree splits
* as a side effect. In practice, recovery can cause splits, but abort
* can't.
*
* @param searchDupTree true if a search through the dup tree looking for
* a match on the ln's node id should be made (only in the case where
* dupKey == null). See SR 8984.
*
* @param updateGeneration if true, set the generation count during
* latching. Pass false when the LRU should not be impacted, such as
* during eviction and checkpointing.
*
* @return true if node found in tree.
* If false is returned and there is the possibility that we can insert
* the record into a plausible parent we must also set
* - location.bin (may be null if no possible parent found)
* - location.lnKey (don't need to set if no possible parent).
*/
public boolean getParentBINForChildLN(TreeLocation location,
byte[] mainKey,
byte[] dupKey,
LN ln,
boolean splitsAllowed,
boolean findDeletedEntries,
boolean searchDupTree,
boolean updateGeneration)
throws DatabaseException {
/*
* Find the BIN that either points to this LN or could be its
* ancestor.
*/
IN searchResult = null;
try {
if (splitsAllowed) {
searchResult = searchSplitsAllowed
(mainKey, -1, updateGeneration);
} else {
searchResult = search
(mainKey, SearchType.NORMAL, -1, null, updateGeneration);
}
location.bin = (BIN) searchResult;
} catch (Exception e) {
/* SR 11360 tracing. */
StringBuffer msg = new StringBuffer();
if (searchResult != null) {
searchResult.releaseLatchIfOwner();
msg.append("searchResult=" + searchResult.getClass() +
" nodeId=" + searchResult.getNodeId() +
" nEntries=" + searchResult.getNEntries());
}
throw new DatabaseException(msg.toString(), e);
}
if (location.bin == null) {
return false;
}
/*
* If caller wants us to consider knownDeleted entries then do an
* inexact search in findEntry since that will find knownDeleted
* entries. If caller doesn't want us to consider knownDeleted entries
* then do an exact search in findEntry since that will not return
* knownDeleted entries.
*/
boolean exactSearch = false;
boolean indicateIfExact = true;
if (!findDeletedEntries) {
exactSearch = true;
indicateIfExact = false;
}
location.index =
location.bin.findEntry(mainKey, indicateIfExact, exactSearch);
boolean match = false;
if (findDeletedEntries) {
match = (location.index >= 0 &&
(location.index & IN.EXACT_MATCH) != 0);
location.index &= ~IN.EXACT_MATCH;
} else {
match = (location.index >= 0);
}
if (match) {
/*
* A BIN parent was found and a slot matches the key. See if
* we have to search further into what may be a dup tree.
*/
if (!location.bin.isEntryKnownDeleted(location.index)) {
/*
* If this database doesn't support duplicates, no point in
* incurring the potentially large cost of fetching in
* the child to check for dup trees. In the future, we could
* optimize further by storing state per slot as to whether
* a dup tree hangs below.
*/
if (database.getSortedDuplicates()) {
Node childNode = location.bin.fetchTarget(location.index);
try {
/*
* Is our target LN a regular record or a dup count?
*/
if (childNode == null) {
/* Child is a deleted cleaned LN. */
} else if (ln.containsDuplicates()) {
/* This is a duplicate count LN. */
return searchDupTreeForDupCountLNParent
(location, mainKey, childNode);
} else {
/*
* This is a regular LN. If this is a dup tree,
* descend and search. If not, we've found the
* parent.
*/
if (childNode.containsDuplicates()) {
if (dupKey == null) {
/*
* We are at a dup tree but our target LN
* has no dup key because it's a deleted
* LN. We've encountered the case of SR
* 8984 where we are searching for an LN
* that was deleted before the conversion
* to a duplicate tree.
*/
return searchDupTreeByNodeId
(location, childNode, ln,
searchDupTree, updateGeneration);
} else {
return searchDupTreeForDBIN
(location, dupKey, (DIN) childNode,
ln, findDeletedEntries,
indicateIfExact, exactSearch,
splitsAllowed, updateGeneration);
}
}
}
} catch (DatabaseException e) {
location.bin.releaseLatchIfOwner();
throw e;
}
}
}
/* We had a match, we didn't need to search the duplicate tree. */
location.childLsn = location.bin.getLsn(location.index);
return true;
} else {
location.lnKey = mainKey;
return false;
}
}
/**
* For SR [#8984]: our prospective child is a deleted LN, and
* we're facing a dup tree. Alas, the deleted LN has no data, and
* therefore nothing to guide the search in the dup tree. Instead,
* we search by node id. This is very expensive, but this
* situation is a very rare case.
*/
private boolean searchDupTreeByNodeId(TreeLocation location,
Node childNode,
LN ln,
boolean searchDupTree,
boolean updateGeneration)
throws DatabaseException {
if (searchDupTree) {
BIN oldBIN = location.bin;
if (childNode.matchLNByNodeId
(location, ln.getNodeId())) {
location.index &= ~IN.EXACT_MATCH;
if (oldBIN != null) {
oldBIN.releaseLatch();
}
location.bin.latch(updateGeneration);
return true;
} else {
return false;
}
} else {
/*
* This is called from undo() so this LN can
* just be ignored.
*/
return false;
}
}
/**
* @return true if childNode is the DIN parent of this DupCountLN
*/
private boolean searchDupTreeForDupCountLNParent(TreeLocation location,
byte[] mainKey,
Node childNode)
throws DatabaseException {
location.lnKey = mainKey;
if (childNode instanceof DIN) {
DIN dupRoot = (DIN) childNode;
location.childLsn = dupRoot.getDupCountLNRef().getLsn();
return true;
} else {
/*
* If we're looking for a DupCountLN but don't find a
* duplicate tree, then the key now refers to a single
* datum. This can happen when all dups for a key are
* deleted, the compressor runs, and then a single
* datum is inserted. [#10597]
*/
return false;
}
}
/**
* Search the dup tree for the DBIN parent of this ln.
*/
private boolean searchDupTreeForDBIN(TreeLocation location,
byte[] dupKey,
DIN dupRoot,
LN ln,
boolean findDeletedEntries,
boolean indicateIfExact,
boolean exactSearch,
boolean splitsAllowed,
boolean updateGeneration)
throws DatabaseException {
assert dupKey != null;
dupRoot.latch();
try {
/* Make sure there's room for inserts. */
if (maybeSplitDuplicateRoot(location.bin, location.index)) {
dupRoot = (DIN) location.bin.fetchTarget(location.index);
}
/*
* Wait until after any duplicate root splitting to unlatch the
* bin.
*/
location.bin.releaseLatch();
/*
* The dupKey is going to be the key that represents the LN in this
* BIN parent.
*/
location.lnKey = dupKey;
/* Search the dup tree */
if (splitsAllowed) {
try {
location.bin = (BIN) searchSubTreeSplitsAllowed
(dupRoot, location.lnKey, ln.getNodeId(),
updateGeneration);
} catch (SplitRequiredException e) {
/*
* Shouldn't happen; the only caller of this method which
* allows splits is from recovery, which is single
* threaded.
*/
throw new DatabaseException(e);
}
} else {
location.bin = (BIN) searchSubTree
(dupRoot, location.lnKey, SearchType.NORMAL,
ln.getNodeId(), null, updateGeneration);
}
/* Search for LN w/exact key. */
location.index = location.bin.findEntry
(location.lnKey, indicateIfExact, exactSearch);
boolean match;
if (findDeletedEntries) {
match = (location.index >= 0 &&
(location.index & IN.EXACT_MATCH) != 0);
location.index &= ~IN.EXACT_MATCH;
} else {
match = (location.index >= 0);
}
if (match) {
location.childLsn = location.bin.getLsn(location.index);
return true;
} else {
return false;
}
} catch (DatabaseException e) {
dupRoot.releaseLatchIfOwner();
throw e;
}
}
/**
* Return a reference to the adjacent BIN.
*
* @param bin The BIN to find the next BIN for. This BIN is latched.
* @param traverseWithinDupTree if true, only search within the dup tree
* and return null when the traversal runs out of duplicates.
*
* @return The next BIN, or null if there are no more. The returned node
* is latched and the caller must release it. If null is returned, the
* argument BIN remains latched.
*/
public BIN getNextBin(BIN bin, boolean traverseWithinDupTree)
throws DatabaseException {
return getNextBinInternal(traverseWithinDupTree, bin, true);
}
/**
* Return a reference to the previous BIN.
*
* @param bin The BIN to find the next BIN for. This BIN is latched.
* @param traverseWithinDupTree if true, only search within the dup tree
* and return null when the traversal runs out of duplicates.
*
* @return The previous BIN, or null if there are no more. The returned
* node is latched and the caller must release it. If null is returned,
* the argument bin remains latched.
*/
public BIN getPrevBin(BIN bin, boolean traverseWithinDupTree)
throws DatabaseException {
return getNextBinInternal(traverseWithinDupTree, bin, false);
}
/**
* Helper routine for above two routines to iterate through BIN's.
*/
private BIN getNextBinInternal(boolean traverseWithinDupTree,
BIN bin,
boolean forward)
throws DatabaseException {
/*
* Use the right most key (for a forward progressing cursor) or the
* left most key (for a backward progressing cursor) as the idkey. The
* reason is that the BIN may get split while finding the next BIN so
* it's not safe to take the BIN's identifierKey entry. If the BIN
* gets splits, then the right (left) most key will still be on the
* resultant node. The exception to this is that if there are no
* entries, we just use the identifier key.
*/
byte[] idKey = null;
if (bin.getNEntries() == 0) {
idKey = bin.getIdentifierKey();
} else if (forward) {
idKey = bin.getKey(bin.getNEntries() - 1);
} else {
idKey = bin.getKey(0);
}
IN next = bin;
assert LatchSupport.countLatchesHeld() == 1:
LatchSupport.latchesHeldToString();
/*
* Ascend the tree until we find a level that still has nodes to the
* right (or left if !forward) of the path that we're on. If we reach
* the root level, we're done. If we're searching within a duplicate
* tree, stay within the tree.
*/
IN parent = null;
IN nextIN = null;
try {
while (true) {
/*
* Move up a level from where we are now and check to see if we
* reached the top of the tree.
*/
SearchResult result = null;
if (!traverseWithinDupTree) {
/* Operating on a regular tree -- get the parent. */
result = getParentINForChildIN
(next, true /* requireExactMatch */,
true /* updateGeneration */);
if (result.exactParentFound) {
parent = result.parent;
} else {
/* We've reached the root of the tree. */
assert (LatchSupport.countLatchesHeld() == 0):
LatchSupport.latchesHeldToString();
return null;
}
} else {
/* This is a duplicate tree, stay within the tree.*/
if (next.isRoot()) {
/* We've reached the top of the dup tree. */
next.releaseLatch();
return null;
} else {
result = getParentINForChildIN
(next, true /* requireExactMatch */,
true /* updateGeneration */);
if (result.exactParentFound) {
parent = result.parent;
} else {
return null;
}
}
}
assert (LatchSupport.countLatchesHeld() == 1) :
LatchSupport.latchesHeldToString();
/*
* Figure out which entry we are in the parent. Add (subtract)
* 1 to move to the next (previous) one and check that we're
* still pointing to a valid child. Don't just use the result
* of the parent.findEntry call in getParentNode, because we
* want to use our explicitly chosen idKey.
*/
int index = parent.findEntry(idKey, false, false);
boolean moreEntriesThisBin = false;
if (forward) {
index++;
if (index < parent.getNEntries()) {
moreEntriesThisBin = true;
}
} else {
if (index > 0) {
moreEntriesThisBin = true;
}
index--;
}
if (moreEntriesThisBin) {
/*
* There are more entries to the right of the current path
* in parent. Get the entry, and then descend down the
* left most path to a BIN.
*/
nextIN = (IN) parent.fetchTarget(index);
nextIN.latch();
assert (LatchSupport.countLatchesHeld() == 2):
LatchSupport.latchesHeldToString();
if (nextIN instanceof BIN) {
/* We landed at a leaf (i.e. a BIN). */
parent.releaseLatch();
TreeWalkerStatsAccumulator treeStatsAccumulator =
getTreeStatsAccumulator();
if (treeStatsAccumulator != null) {
nextIN.accumulateStats(treeStatsAccumulator);
}
return (BIN) nextIN;
} else {
/*
* We landed at an IN. Descend down to the appropriate
* leaf (i.e. BIN) node.
*/
IN ret = searchSubTree(nextIN, null,
(forward ?
SearchType.LEFT :
SearchType.RIGHT),
-1,
null,
true /*updateGeneration*/);
parent.releaseLatch();
assert LatchSupport.countLatchesHeld() == 1:
LatchSupport.latchesHeldToString();
if (ret instanceof BIN) {
return (BIN) ret;
} else {
throw new InconsistentNodeException
("subtree did not have a BIN for leaf");
}
}
}
next = parent;
}
} catch (DatabaseException e) {
next.releaseLatchIfOwner();
if (parent != null) {
parent.releaseLatchIfOwner();
}
if (nextIN != null) {
nextIN.releaseLatchIfOwner();
}
throw e;
}
}
/**
* Split the root of the tree.
*/
private void splitRoot()
throws DatabaseException {
/*
* Create a new root IN, insert the current root IN into it, and then
* call split.
*/
EnvironmentImpl env = database.getDbEnvironment();
LogManager logManager = env.getLogManager();
INList inMemoryINs = env.getInMemoryINs();
IN curRoot = null;
curRoot = (IN) root.fetchTarget(database, null);
curRoot.latch();
long curRootLsn = 0;
long logLsn = 0;
IN newRoot = null;
try {
/*
* Make a new root IN, giving it an id key from the previous root.
*/
byte[] rootIdKey = curRoot.getKey(0);
newRoot = new IN(database, rootIdKey, maxMainTreeEntriesPerNode,
curRoot.getLevel() + 1);
newRoot.setIsRoot(true);
curRoot.setIsRoot(false);
/*
* Make the new root IN point to the old root IN. Log the old root
* provisionally, because we modified it so it's not the root
* anymore, then log the new root. We are guaranteed to be able to
* insert entries, since we just made this root.
*/
try {
curRootLsn = curRoot.logProvisional(logManager, newRoot);
boolean insertOk = newRoot.insertEntry
(new ChildReference(curRoot, rootIdKey, curRootLsn));
assert insertOk;
logLsn = newRoot.log(logManager);
} catch (DatabaseException e) {
/* Something went wrong when we tried to log. */
curRoot.setIsRoot(true);
throw e;
}
inMemoryINs.add(newRoot);
/*
* Make the tree's root reference point to this new node. Now the
* MapLN is logically dirty, but the change hasn't been logged. Be
* sure to flush the MapLN if we ever evict the root.
*/
root.setTarget(newRoot);
root.setLsn(logLsn);
curRoot.split(newRoot, 0, maxMainTreeEntriesPerNode);
root.setLsn(newRoot.getLastFullVersion());
} finally {
curRoot.releaseLatch();
}
treeStats.nRootSplits++;
traceSplitRoot(Level.FINE, TRACE_ROOT_SPLIT, newRoot, logLsn,
curRoot, curRootLsn);
}
/**
* Search the tree, starting at the root. Depending on search type either
* search using key, or search all the way down the right or left sides.
* Stop the search either when the bottom of the tree is reached, or a node
* matching nid is found (see below) in which case that node's parent is
* returned.
*
* Preemptive splitting is not done during the search.
*
* @param key - the key to search for, or null if searchType is LEFT or
* RIGHT.
*
* @param searchType - The type of tree search to perform. NORMAL means
* we're searching for key in the tree. LEFT/RIGHT means we're descending
* down the left or right side, resp. DELETE means we're descending the
* tree and will return the lowest node in the path that has > 1 entries.
*
* @param nid - The nodeid to search for in the tree. If found, returns
* its parent. If the nodeid of the root is passed, null is returned.
*
* @param binBoundary - If non-null, information is returned about whether
* the BIN found is the first or last BIN in the database.
*
* @return - the Node that matches the criteria, if any. This is the node
* that is farthest down the tree with a match. Returns null if the root
* is null. Node is latched (unless it's null) and must be unlatched by
* the caller. Only IN's and BIN's are returned, not LN's. In a NORMAL
* search, It is the caller's responsibility to do the findEntry() call on
* the key and BIN to locate the entry that matches key. The return value
* node is latched upon return and it is the caller's responsibility to
* unlatch it.
*/
public IN search(byte[] key,
SearchType searchType,
long nid,
BINBoundary binBoundary,
boolean updateGeneration)
throws DatabaseException {
IN rootIN = getRootIN(true /* updateGeneration */);
if (rootIN != null) {
return searchSubTree
(rootIN, key, searchType, nid, binBoundary, updateGeneration);
} else {
return null;
}
}
/**
* Do a key based search, permitting pre-emptive splits. Returns the
* target node's parent.
*/
public IN searchSplitsAllowed(byte[] key,
long nid,
boolean updateGeneration)
throws DatabaseException {
IN insertTarget = null;
while (insertTarget == null) {
rootLatch.acquireShared();
boolean rootLatched = true;
boolean rootLatchedExclusive = false;
IN rootIN = null;
try {
while (true) {
if (root != null) {
rootIN = (IN) root.fetchTarget(database, null);
/* Check if root needs splitting. */
if (rootIN.needsSplitting()) {
if (!rootLatchedExclusive) {
rootIN = null;
rootLatch.release();
rootLatch.acquireExclusive();
rootLatchedExclusive = true;
continue;
}
splitRoot();
/*
* We can't hold any latches while we lock. If the
* root splits again between latch release and
* DbTree.db lock, no problem. The latest root
* will still get written out.
*/
rootLatch.release();
rootLatched = false;
EnvironmentImpl env = database.getDbEnvironment();
env.getDbMapTree().modifyDbRoot(database);
rootLatched = true;
rootLatch.acquireExclusive();
rootIN = (IN) root.fetchTarget(database, null);
}
rootIN.latch();
}
break;
}
} finally {
if (rootLatched) {
rootLatch.release();
}
}
/* Don't loop forever if the root is null. [#13897] */
if (rootIN == null) {
break;
}
try {
insertTarget = searchSubTreeSplitsAllowed(rootIN, key, nid,
updateGeneration);
} catch (SplitRequiredException e) {
/*
* The last slot in the root was used at the point when this
* thread released the rootIN latch in order to force splits.
* Retry. SR [#11147].
*/
continue;
}
}
return insertTarget;
}
/**
* Searches a portion of the tree starting at parent using key. If during
* the search a node matching a non-null nid argument is found, its parent
* is returned. If searchType is NORMAL, then key must be supplied to
* guide the search. If searchType is LEFT (or RIGHT), then the tree is
* searched down the left (or right) side to find the first (or last) leaf,
* respectively.
* <p>
* Enters with parent latched, assuming it's not null. Exits with the
* return value latched, assuming it's not null.
* <p>
* @param parent - the root of the subtree to start the search at. This
* node should be latched by the caller and will be unlatched prior to
* return.
*
* @param key - the key to search for, unless searchType is LEFT or RIGHT
*
* @param searchType - NORMAL means search using key and, optionally, nid.
* LEFT means find the first (leftmost) leaf
* RIGHT means find the last (rightmost) leaf
*
* @param nid - The nodeid to search for in the tree. If found, returns
* its parent. If the nodeid of the root is passed, null is returned.
* Pass -1 if no nodeid based search is desired.
*
* @return - the node matching the argument criteria, or null. The node is
* latched and must be unlatched by the caller. The parent argument and
* any other nodes that are latched during the search are unlatched prior
* to return.
*/
public IN searchSubTree(IN parent,
byte[] key,
SearchType searchType,
long nid,
BINBoundary binBoundary,
boolean updateGeneration)
throws DatabaseException {
/* Return null if we're passed a null arg. */
if (parent == null) {
return null;
}
if ((searchType == SearchType.LEFT ||
searchType == SearchType.RIGHT) &&
key != null) {
/*
* If caller is asking for a right or left search, they shouldn't
* be passing us a key.
*/
throw new IllegalArgumentException
("searchSubTree passed key and left/right search");
}
assert parent.isLatchOwner();
if (parent.getNodeId() == nid) {
parent.releaseLatch();
return null;
}
if (binBoundary != null) {
binBoundary.isLastBin = true;
binBoundary.isFirstBin = true;
}
int index;
IN child = null;
TreeWalkerStatsAccumulator treeStatsAccumulator =
getTreeStatsAccumulator();
try {
do {
if (treeStatsAccumulator != null) {
parent.accumulateStats(treeStatsAccumulator);
}
if (parent.getNEntries() == 0) {
/* No more children, can't descend anymore. */
return parent;
} else if (searchType == SearchType.NORMAL) {
/* Look for the entry matching key in the current node. */
index = parent.findEntry(key, false, false);
} else if (searchType == SearchType.LEFT) {
/* Left search, always take the 0th entry. */
index = 0;
} else if (searchType == SearchType.RIGHT) {
/* Right search, always take the highest entry. */
index = parent.getNEntries() - 1;
} else {
throw new IllegalArgumentException
("Invalid value of searchType: " + searchType);
}
assert index >= 0;
if (binBoundary != null) {
if (index != parent.getNEntries() - 1) {
binBoundary.isLastBin = false;
}
if (index != 0) {
binBoundary.isFirstBin = false;
}
}
/* Get the child node that matches. */
child = (IN) parent.fetchTarget(index);
child.latch(updateGeneration);
if (treeStatsAccumulator != null) {
child.accumulateStats(treeStatsAccumulator);
}
/*
* If this child matches nid, then stop the search and return
* the parent.
*/
if (child.getNodeId() == nid) {
child.releaseLatch();
return parent;
}
parent.releaseLatch();
/* Continue down a level */
parent = child;
} while (!(parent instanceof BIN));
return child;
} catch (Throwable t) {
if (child != null) {
child.releaseLatchIfOwner();
}
parent.releaseLatchIfOwner();
if (t instanceof DatabaseException) {
/* don't re-wrap a DatabaseException; we may need its type. */
throw (DatabaseException) t;
} else {
throw new DatabaseException(t);
}
}
}
/**
* Search down the tree using a key, but instead of returning the BIN that
* houses that key, find the point where we can detach a deletable
* subtree. A deletable subtree is a branch where each IN has one child,
* and the bottom BIN has no entries and no resident cursors. That point
* can be found by saving a pointer to the lowest node in the path with
* more than one entry.
*
* INa
* / \
* INb INc
* | |
* INd ..
* / \
* INe ..
* |
* BINx (suspected of being empty)
*
* In this case, we'd like to prune off the subtree headed by INe. INd
* is the parent of this deletable subtree. As we descend, we must keep
* latches for all the nodes that will be logged. In this case, we
* will need to keep INa, INb and INd latched when we return from this
* method.
*
* The method returns a list of parent/child/index structures. In this
* example, the list will hold:
* INa/INb/index
* INb/INd/index
* INd/INe/index
* Every node is latched, and every node except for the bottom most child
* (INe) must be logged.
*/
public void searchDeletableSubTree(IN parent,
byte[] key,
ArrayList nodeLadder)
throws DatabaseException,
NodeNotEmptyException,
CursorsExistException {
assert (parent!=null);
assert (key!= null);
assert parent.isLatchOwner();
int index;
IN child = null;
/* Save the lowest IN in the path that has multiple entries. */
IN lowestMultipleEntryIN = null;
do {
if (parent.getNEntries() == 0) {
break;
}
/* Remember if this is the lowest multiple point. */
if (parent.getNEntries() > 1) {
lowestMultipleEntryIN = parent;
}
index = parent.findEntry(key, false, false);
assert index >= 0;
/* Get the child node that matches. */
child = (IN) parent.fetchTarget(index);
child.latch(false);
nodeLadder.add(new SplitInfo(parent, child, index));
/* Continue down a level */
parent = child;
} while (!(parent instanceof BIN));
/*
* See if there is a reason we can't delete this BIN -- i.e.
* new items have been inserted, or a cursor exists on it.
*/
if ((child != null) && (child instanceof BIN)) {
if (child.getNEntries() != 0) {
throw NodeNotEmptyException.NODE_NOT_EMPTY;
}
/*
* This case can happen if we are keeping a BIN on an empty
* cursor as we traverse.
*/
if (((BIN) child).nCursors() > 0) {
throw CursorsExistException.CURSORS_EXIST;
}
}
if (lowestMultipleEntryIN != null) {
/*
* Release all nodes up to the pair that holds the detach
* point. We won't be needing those nodes, since they'll be
* pruned and won't need to be updated.
*/
ListIterator iter = nodeLadder.listIterator(nodeLadder.size());
while (iter.hasPrevious()) {
SplitInfo info = (SplitInfo) iter.previous();
if (info.parent == lowestMultipleEntryIN) {
break;
} else {
info.child.releaseLatch();
iter.remove();
}
}
} else {
/*
* We actually have to prune off the entire tree. Release
* all latches, and clear the node ladder.
*/
releaseNodeLadderLatches(nodeLadder);
nodeLadder.clear();
}
}
/**
* Search the portion of the tree starting at the parent, permitting
* preemptive splits.
*/
private IN searchSubTreeSplitsAllowed(IN parent,
byte[] key,
long nid,
boolean updateGeneration)
throws DatabaseException, SplitRequiredException {
if (parent != null) {
/*
* Search downward until we hit a node that needs a split. In
* that case, retreat to the top of the tree and force splits
* downward.
*/
while (true) {
try {
return searchSubTreeUntilSplit
(parent, key, nid, updateGeneration);
} catch (SplitRequiredException e) {
/* SR [#11144]*/
if (waitHook != null) {
waitHook.doHook();
}
/*
* ForceSplit may itself throw SplitRequiredException if it
* finds that the parent doesn't have room to hold an extra
* entry. Allow the exception to propagate up to a place
* where it's safe to split the parent. We do this rather
* than
*/
forceSplit(parent, key);
}
}
} else {
return null;
}
}
/**
* Search the subtree, but throw an exception when we see a node
* that has to be split.
*/
private IN searchSubTreeUntilSplit(IN parent,
byte[] key,
long nid,
boolean updateGeneration)
throws DatabaseException, SplitRequiredException {
/* Return null if we're passed a null arg. */
if (parent == null) {
return null;
}
assert parent.isLatchOwner();
if (parent.getNodeId() == nid) {
parent.releaseLatch();
return null;
}
int index;
IN child = null;
try {
do {
if (parent.getNEntries() == 0) {
/* No more children, can't descend anymore. */
return parent;
} else {
/* Look for the entry matching key in the current node. */
index = parent.findEntry(key, false, false);
}
assert index >= 0;
/* Get the child node that matches. */
child = (IN) parent.fetchTarget(index);
child.latch(updateGeneration);
/* Throw if we need to split. */
if (child.needsSplitting()) {
child.releaseLatch();
parent.releaseLatch();
throw splitRequiredException;
}
/*
* If this child matches nid, then stop the search and return
* the parent.
*/
if (child.getNodeId() == nid) {
child.releaseLatch();
return parent;
}
/* Continue down a level */
parent.releaseLatch();
parent = child;
} while (!(parent instanceof BIN));
return parent;
} catch (DatabaseException e) {
if (child != null) {
child.releaseLatchIfOwner();
}
parent.releaseLatchIfOwner();
throw e;
}
}
/**
* Do pre-emptive splitting in the subtree topped by the "parent" node.
* Search down the tree until we get to the BIN level, and split any nodes
* that fit the splittable requirement.
*
* Note that more than one node in the path may be splittable. For example,
* a tree might have a level2 IN and a BIN that are both splittable, and
* would be encountered by the same insert operation.
*/
private void forceSplit(IN parent,
byte[] key)
throws DatabaseException, SplitRequiredException {
ArrayList nodeLadder = new ArrayList();
boolean allLeftSideDescent = true;
boolean allRightSideDescent = true;
int index;
IN child = null;
IN originalParent = parent;
ListIterator iter = null;
boolean isRootLatched = false;
boolean success = false;
try {
/*
* Latch the root in order to update the root LSN when we're done.
* Latch order must be: root, root IN. We'll leave this method
* with the original parent latched.
*/
if (originalParent.isDbRoot()) {
rootLatch.acquireExclusive();
isRootLatched = true;
}
originalParent.latch();
/*
* Another thread may have crept in and
* - used the last free slot in the parent, making it impossible
* to correctly progagate the split.
* - actually split the root, in which case we may be looking at
* the wrong subtree for this search.
* If so, throw and retry from above. SR [#11144]
*/
if (originalParent.needsSplitting() || !originalParent.isRoot()) {
throw splitRequiredException;
}
/*
* Search downward to the BIN level, saving the information
* needed to do a split if necessary.
*/
do {
if (parent.getNEntries() == 0) {
/* No more children, can't descend anymore. */
break;
} else {
/* Look for the entry matching key in the current node. */
index = parent.findEntry(key, false, false);
if (index != 0) {
allLeftSideDescent = false;
}
if (index != (parent.getNEntries() - 1)) {
allRightSideDescent = false;
}
}
assert index >= 0;
/*
* Get the child node that matches. We only need to work on
* nodes in residence.
*/
child = (IN) parent.getTarget(index);
if (child == null) {
break;
} else {
child.latch();
nodeLadder.add(new SplitInfo(parent, child, index));
}
/* Continue down a level */
parent = child;
} while (!(parent instanceof BIN));
boolean startedSplits = false;
LogManager logManager =
database.getDbEnvironment().getLogManager();
/*
* Process the accumulated nodes from the bottom up. Split each
* node if required. If the node should not split, we check if
* there have been any splits on the ladder yet. If there are none,
* we merely release the node, since there is no update. If splits
* have started, we need to propagate new LSNs upward, so we log
* the node and update its parent.
*
* Start this iterator at the end of the list.
*/
iter = nodeLadder.listIterator(nodeLadder.size());
long lastParentForSplit = -1;
while (iter.hasPrevious()) {
SplitInfo info = (SplitInfo) iter.previous();
child = info.child;
parent = info.parent;
index = info.index;
/* Opportunistically split the node if it is full. */
if (child.needsSplitting()) {
int maxEntriesPerNode = (child.containsDuplicates() ?
maxDupTreeEntriesPerNode :
maxMainTreeEntriesPerNode);
if (allLeftSideDescent || allRightSideDescent) {
child.splitSpecial(parent,
index,
maxEntriesPerNode,
key,
allLeftSideDescent);
} else {
child.split(parent, index, maxEntriesPerNode);
}
lastParentForSplit = parent.getNodeId();
startedSplits = true;
/*
* If the DB root IN was logged, update the DB tree's child
* reference. Now the MapLN is logically dirty, but the
* change hasn't been logged. Set the rootIN to be dirty
* again, to force flushing the rootIN and mapLN in the
* next checkpoint. Be sure to flush the MapLN
* if we ever evict the root.
*/
if (parent.isDbRoot()) {
assert isRootLatched;
root.setLsn(parent.getLastFullVersion());
parent.setDirty(true);
}
} else {
if (startedSplits) {
long newLsn = 0;
/*
* If this child was the parent of a split, it's
* already logged by the split call. We just need to
* propagate the logging upwards. If this child is just
* a link in the chain upwards, log it.
*/
if (lastParentForSplit == child.getNodeId()) {
newLsn = child.getLastFullVersion();
} else {
newLsn = child.log(logManager);
}
parent.updateEntry(index, newLsn);
}
}
child.releaseLatch();
child = null;
iter.remove();
}
success = true;
} finally {
if (!success) {
if (child != null) {
child.releaseLatchIfOwner();
}
originalParent.releaseLatchIfOwner();
}
/*
* Unlatch any remaining children. There should only be remainders
* in the event of an exception.
*/
if (nodeLadder.size() > 0) {
iter = nodeLadder.listIterator(nodeLadder.size());
while (iter.hasPrevious()) {
SplitInfo info = (SplitInfo) iter.previous();
info.child.releaseLatchIfOwner();
}
}
if (isRootLatched) {
rootLatch.release();
}
}
}
/**
* Helper to obtain the root IN with proper root latching. Optionally
* updates the generation of the root when latching it.
*/
public IN getRootIN(boolean updateGeneration)
throws DatabaseException {
rootLatch.acquireShared();
IN rootIN = null;
try {
if (root != null) {
rootIN = (IN) root.fetchTarget(database, null);
rootIN.latch(updateGeneration);
}
return rootIN;
} finally {
rootLatch.release();
}
}
/**
* Inserts a new LN into the tree.
* @param ln The LN to insert into the tree.
* @param key Key value for the node
* @param allowDuplicates whether to allow duplicates to be inserted
* @param cursor the cursor to update to point to the newly inserted
* key/data pair, or null if no cursor should be updated.
* @return true if LN was inserted, false if it was a duplicate
* duplicate or if an attempt was made to insert a duplicate when
* allowDuplicates was false.
*/
public boolean insert(LN ln,
byte[] key,
boolean allowDuplicates,
CursorImpl cursor,
LockResult lnLock)
throws DatabaseException {
validateInsertArgs(allowDuplicates);
EnvironmentImpl env = database.getDbEnvironment();
LogManager logManager = env.getLogManager();
INList inMemoryINs = env.getInMemoryINs();
/* Find and latch the relevant BIN. */
BIN bin = null;
try {
bin = findBinForInsert(key, logManager, inMemoryINs, cursor);
assert bin.isLatchOwner();
/* Make a child reference as a candidate for insertion. */
ChildReference newLNRef =
new ChildReference(ln, key, DbLsn.NULL_LSN);
/*
* If we're doing a put that is not a putCurrent, then the cursor
* passed in may not be pointing to BIN (it was set to the BIN that
* the search landed on which may be different than BIN). Set the
* BIN correctly here so that adjustCursorsForInsert doesn't blow
* an assertion. We'll finish the job by setting the index below.
*/
cursor.setBIN(bin);
int index = bin.insertEntry1(newLNRef);
if ((index & IN.INSERT_SUCCESS) != 0) {
/*
* Update the cursor to point to the entry that has been
* successfully inserted.
*/
index &= ~IN.INSERT_SUCCESS;
cursor.updateBin(bin, index);
/* Log the new LN. */
long newLsn = DbLsn.NULL_LSN;
try {
newLsn = ln.log
(env, database.getId(), key, DbLsn.NULL_LSN,
cursor.getLocker());
} finally {
if (newLsn == DbLsn.NULL_LSN) {
/*
* Possible buffer overflow, out-of-memory, or I/O
* exception during logging. The BIN entry will
* contain a NULL_LSN. To prevent an exception during
* a fetch, we set the KnownDeleted flag. We do not
* call BIN.deleteEntry because cursors will not be
* adjusted. We do not add this entry to the
* compressor queue to avoid complexity (this is rare).
* [13126, 12605, 11271]
*/
bin.setKnownDeleted(index);
}
}
lnLock.setAbortLsn(DbLsn.NULL_LSN, true, true);
bin.updateEntry(index, newLsn);
traceInsert(Level.FINER, env, bin, ln, newLsn, index);
return true;
} else {
/*
* Entry may have been a duplicate. Insertion was not
* successful.
*/
index &= ~IN.EXACT_MATCH;
cursor.updateBin(bin, index);
LN currentLN = null;
boolean isDup = false;
Node n = bin.fetchTarget(index);
if (n == null || n instanceof LN) {
currentLN = (LN) n;
} else {
isDup = true;
}
/* If an LN is present, lock it and check deleted-ness. */
boolean isDeleted = false;
LockResult currentLock = null;
if (!isDup) {
if (currentLN == null) {
/* The LN was cleaned. */
isDeleted = true;
} else {
currentLock = cursor.lockLNDeletedAllowed
(currentLN, LockType.WRITE);
currentLN = currentLock.getLN();
/* The BIN/index may have changed while locking. */
bin = cursor.getBIN();
index = cursor.getIndex();
if (cursor.getDupBIN() != null) {
/*
* A dup tree appeared during locking. We will
* position to a different dup tree entry later in
* insertDuplicate, so we must remove the cursor
* from this dup tree entry. This is a rare case
* so performance is not an issue.
*/
cursor.clearDupBIN(true /*alreadyLatched*/);
isDup = true;
} else if (bin.isEntryKnownDeleted(index) ||
currentLN == null ||
currentLN.isDeleted()) {
/* The current LN is deleted/cleaned. */
isDeleted = true;
}
}
}
if (isDeleted) {
/*
* Set the abort LSN to that of the lock held on the
* current LN, if the current LN was previously locked by
* this txn. This is needed when we change the node ID of
* this slot.
*
* If reusing a slot with a deleted LN deleted in a prior
* transaction (the LockGrantType is NEW or UPGRADE),
* always set abortKnownDeleted=true. It may be that the
* existing slot is PENDING_DELETED, but we restore to
* KNOWN_DELETED in the event of an abort.
*/
long abortLsn = bin.getLsn(index);
boolean abortKnownDeleted = true;
if (currentLN != null &&
currentLock.getLockGrant() == LockGrantType.EXISTING) {
long nodeId = currentLN.getNodeId();
Locker locker = cursor.getLocker();
WriteLockInfo info = locker.getWriteLockInfo(nodeId);
abortLsn = info.getAbortLsn();
abortKnownDeleted = info.getAbortKnownDeleted();
}
lnLock.setAbortLsn(abortLsn, abortKnownDeleted);
/*
* Current entry is a deleted entry. Replace it with LN.
* Pass NULL_LSN for the oldLsn parameter of the log()
* method because the old LN was counted obsolete when it
* was deleted.
*/
long newLsn = ln.log(env, database.getId(),
key, DbLsn.NULL_LSN,
cursor.getLocker());
bin.updateEntry(index, ln, newLsn, key);
bin.clearKnownDeleted(index);
bin.clearPendingDeleted(index);
traceInsert(Level.FINER, env, bin, ln, newLsn, index);
return true;
} else {
/*
* Attempt to insert a duplicate in an exception dup tree
* or create a dup tree if none exists.
*/
return insertDuplicate
(key, bin, ln, logManager, inMemoryINs, cursor, lnLock,
allowDuplicates);
}
}
} finally {
cursor.releaseBIN();
}
}
/**
* Attempts to insert a duplicate at the current cursor BIN position. If
* an existing dup tree exists, insert into it; otherwise, create a new
* dup tree and place the new LN and the existing LN into it. If the
* current BIN entry contains an LN, the caller guarantees that it is not
* deleted.
*
* @return true if duplicate inserted successfully, false if it was a
* duplicate duplicate, false if a there is an existing LN and
* allowDuplicates is false.
*/
private boolean insertDuplicate(byte[] key,
BIN bin,
LN newLN,
LogManager logManager,
INList inMemoryINs,
CursorImpl cursor,
LockResult lnLock,
boolean allowDuplicates)
throws DatabaseException {
EnvironmentImpl env = database.getDbEnvironment();
int index = cursor.getIndex();
boolean successfulInsert = false;
DIN dupRoot = null;
Node n = bin.fetchTarget(index);
long binNid = bin.getNodeId();
if (n instanceof DIN) {
DBIN dupBin = null;
/*
* A duplicate tree exists. Find the relevant DBIN and insert the
* new entry into it.
*/
try {
dupRoot = (DIN) n;
dupRoot.latch();
/* Lock the DupCountLN before logging any LNs. */
LockResult dclLockResult =
cursor.lockDupCountLN(dupRoot, LockType.WRITE);
/* The BIN/index may have changed during locking. */
bin = cursor.getBIN();
index = cursor.getIndex();
/*
* Do not proceed if duplicates are not allowed and there are
* one or more duplicates already present. Note that if the
* dup count is zero, we allow the insert.
*/
if (!allowDuplicates) {
DupCountLN dcl = (DupCountLN) dclLockResult.getLN();
if (dcl.getDupCount() > 0) {
return false;
}
}
/*
* Split the dup root if necessary. The dup root may have
* changed during locking above or by the split, so refetch it.
* In either case it will be latched.
*/
maybeSplitDuplicateRoot(bin, index);
dupRoot = (DIN) bin.fetchTarget(index);
/*
* Search the duplicate tree for the right place to insert this
* new record. Releases the latch on duplicateRoot. If the
* duplicateRoot got logged as a result of some splitting,
* update the BIN's LSN information. The SortedLSNTreeWalker
* relies on accurate LSNs in the in-memory tree.
*/
byte[] newLNKey = newLN.getData();
long previousLsn = dupRoot.getLastFullVersion();
try {
dupBin = (DBIN) searchSubTreeSplitsAllowed
(dupRoot, newLNKey, -1, true /*updateGeneration*/);
} catch (SplitRequiredException e) {
/*
* Shouldn't happen -- we have the DIN in the root of the
* dup tree latched during this insert, so there should be
* no possibility of being unable to insert a new entry
* into the DIN root of the dup tree.
*/
throw new DatabaseException(e) ;
}
long currentLsn = dupRoot.getLastFullVersion();
if (currentLsn != previousLsn) {
bin.updateEntry(index, currentLsn);
}
/* Release the BIN latch to increase concurrency. */
cursor.releaseBIN();
bin = null;
/* The search above released the dup root latch. */
dupRoot = null;
/*
* Try to insert a new reference object. If successful, we'll
* log the LN and update the LSN in the reference.
*/
ChildReference newLNRef =
new ChildReference(newLN, newLNKey, DbLsn.NULL_LSN);
int dupIndex = dupBin.insertEntry1(newLNRef);
if ((dupIndex & IN.INSERT_SUCCESS) != 0) {
/*
* Update the cursor to point to the entry that has been
* successfully inserted.
*/
dupIndex &= ~IN.INSERT_SUCCESS;
cursor.updateDBin(dupBin, dupIndex);
/* Log the new LN. */
long newLsn = DbLsn.NULL_LSN;
try {
newLsn = newLN.log
(env, database.getId(), key, DbLsn.NULL_LSN,
cursor.getLocker());
} finally {
if (newLsn == DbLsn.NULL_LSN) {
/* See Tree.insert for an explanation. */
dupBin.setKnownDeleted(dupIndex);
}
}
lnLock.setAbortLsn(DbLsn.NULL_LSN, true, true);
dupBin.setLsn(dupIndex, newLsn);
traceInsertDuplicate(Level.FINER,
database.getDbEnvironment(),
dupBin, newLN, newLsn, binNid);
successfulInsert = true;
} else {
/*
* The insert was not successful. Either this is a
* duplicate duplicate or there is an existing entry but
* that entry is deleted.
*/
dupIndex &= ~IN.EXACT_MATCH;
cursor.updateDBin(dupBin, dupIndex);
LN currentLN = (LN) dupBin.fetchTarget(dupIndex);
/* If an LN is present, lock it and check deleted-ness. */
boolean isDeleted = false;
LockResult currentLock = null;
if (currentLN == null) {
/* The LN was cleaned. */
isDeleted = true;
} else {
currentLock = cursor.lockLNDeletedAllowed
(currentLN, LockType.WRITE);
currentLN = currentLock.getLN();
/* The DBIN/index may have changed while locking. */
dupBin = cursor.getDupBIN();
dupIndex = cursor.getDupIndex();
if (dupBin.isEntryKnownDeleted(dupIndex) ||
currentLN == null ||
currentLN.isDeleted()) {
/* The current LN is deleted/cleaned. */
isDeleted = true;
}
}
if (isDeleted) {
/* See Tree.insert for an explanation. */
long abortLsn = dupBin.getLsn(dupIndex);
boolean abortKnownDeleted = true;
if (currentLN != null &&
currentLock.getLockGrant() ==
LockGrantType.EXISTING) {
long nodeId = currentLN.getNodeId();
Locker locker = cursor.getLocker();
WriteLockInfo info =
locker.getWriteLockInfo(nodeId);
abortLsn = info.getAbortLsn();
abortKnownDeleted = info.getAbortKnownDeleted();
}
lnLock.setAbortLsn(abortLsn, abortKnownDeleted);
/*
* Current entry is a deleted entry. Replace it with
* LN. Pass NULL_LSN for the oldLsn parameter of the
* log() method because the old LN was counted obsolete
* when it was deleted.
*/
long newLsn =
newLN.log(env, database.getId(), key,
DbLsn.NULL_LSN, cursor.getLocker());
dupBin.updateEntry(dupIndex, newLN, newLsn, newLNKey);
dupBin.clearKnownDeleted(dupIndex);
dupBin.clearPendingDeleted(dupIndex);
traceInsertDuplicate(Level.FINER,
database.getDbEnvironment(),
dupBin, newLN, newLsn, binNid);
successfulInsert = true;
} else {
/* Duplicate duplicate. */
successfulInsert = false;
}
}
/*
* To avoid latching out of order (up the tree), release the
* DBIN latch before latching the BIN and dup root.
*/
dupBin.releaseLatch();
dupBin = null;
if (successfulInsert) {
cursor.latchBIN();
dupRoot =
cursor.getLatchedDupRoot(false /*isDBINLatched*/);
cursor.releaseBIN();
dupRoot.incrementDuplicateCount
(dclLockResult, key, cursor.getLocker(),
true /*increment*/);
}
} finally {
if (dupBin != null) {
dupBin.releaseLatchIfOwner();
}
if (dupRoot != null) {
dupRoot.releaseLatchIfOwner();
}
}
} else if (n instanceof LN) {
/*
* There is no duplicate tree yet. The existing LN is guaranteed
* to be non-deleted, so to insert we must create a dup tree.
*/
if (!allowDuplicates) {
return false;
}
/*
* Mutate the current BIN/LN pair into a BIN/DupCountLN/DIN/DBIN/LN
* duplicate tree. Log the new entries.
*/
try {
lnLock.setAbortLsn(DbLsn.NULL_LSN, true, true);
dupRoot = createDuplicateTree
(key, logManager, inMemoryINs, newLN, cursor);
} finally {
if (dupRoot != null) {
dupRoot.releaseLatch();
successfulInsert = true;
} else {
successfulInsert = false;
}
}
} else {
throw new InconsistentNodeException
("neither LN or DIN found in BIN");
}
return successfulInsert;
}
/**
* Check if the duplicate root needs to be split. The current duplicate
* root is latched. Exit with the new root (even if it's unchanged)
* latched and the old root (unless the root is unchanged) unlatched.
*
* @param bin the BIN containing the duplicate root.
* @param index the index of the duplicate root in bin.
* @return true if the duplicate root was split.
*/
private boolean maybeSplitDuplicateRoot(BIN bin,
int index)
throws DatabaseException {
DIN curRoot = (DIN) bin.fetchTarget(index);
if (curRoot.needsSplitting()) {
EnvironmentImpl env = database.getDbEnvironment();
LogManager logManager = env.getLogManager();
INList inMemoryINs = env.getInMemoryINs();
/*
* Make a new root DIN, giving it an id key from the previous root.
*/
byte[] rootIdKey = curRoot.getKey(0);
DIN newRoot = new DIN(database,
rootIdKey,
maxDupTreeEntriesPerNode,
curRoot.getDupKey(),
curRoot.getDupCountLNRef(),
curRoot.getLevel() + 1);
newRoot.latch();
long curRootLsn = 0;
long logLsn = 0;
try {
newRoot.setIsRoot(true);
curRoot.setDupCountLN(null);
curRoot.setIsRoot(false);
/*
* Make the new root DIN point to the old root DIN, and then
* log. We should be able to insert into the root because the
* root is newly created.
*/
try {
curRootLsn = curRoot.logProvisional(logManager, newRoot);
boolean insertOk = newRoot.insertEntry
(new ChildReference(curRoot, rootIdKey,
bin.getLsn(index)));
assert insertOk;
logLsn = newRoot.log(logManager);
} catch (DatabaseException e) {
/* Something went wrong when we tried to log. */
curRoot.setIsRoot(true);
throw e;
}
inMemoryINs.add(newRoot);
bin.updateEntry(index, newRoot, logLsn);
curRoot.split(newRoot, 0, maxDupTreeEntriesPerNode);
} finally {
curRoot.releaseLatch();
}
traceSplitRoot(Level.FINE, TRACE_DUP_ROOT_SPLIT,
newRoot, logLsn, curRoot, curRootLsn);
return true;
} else {
return false;
}
}
/**
* Convert an existing BIN entry from a single (non-duplicate) LN to a new
* DIN/DupCountLN->DBIN->LN subtree.
*
* @param key the key of the entry which will become the duplicate key
* for the duplicate subtree.
* @param logManager the logManager
* @param inMemoryINs the in memory IN list
* @param newLN the new record to be inserted
* @param cursor points to the target position for this new dup tree.
* @return the new duplicate subtree root (a DIN). It is latched
* when it is returned and the caller should unlatch it. If new entry
* to be inserted is a duplicate of the existing LN, null is returned.
*/
private DIN createDuplicateTree(byte[] key,
LogManager logManager,
INList inMemoryINs,
LN newLN,
CursorImpl cursor)
throws DatabaseException {
EnvironmentImpl env = database.getDbEnvironment();
DIN dupRoot = null;
DBIN dupBin = null;
BIN bin = cursor.getBIN();
int index = cursor.getIndex();
/*
* fetchTarget returned an LN before this method was called, and we're
* still latched, so the target should never be null here.
*/
LN existingLN = (LN) bin.fetchTarget(index);
boolean existingLNIsDeleted = bin.isEntryKnownDeleted(index) ||
existingLN.isDeleted();
assert existingLN != null;
byte[] existingKey = existingLN.getData();
byte[] newLNKey = newLN.getData();
/* Check for duplicate duplicates. */
boolean keysEqual = Key.compareKeys
(newLNKey, existingKey, database.getDuplicateComparator()) == 0;
if (keysEqual) {
return null;
}
/*
* Replace the existing LN with a duplicate tree.
*
* Once we create a dup tree, we don't revert back to the LN. Create
* a DupCountLN to hold the count for this dup tree. Since we don't
* roll back the internal nodes of a duplicate tree, we need to create
* a pre-transaction version of the DupCountLN. This version must hold
* a count of either 0 or 1, depending on whether the current
* transaction created the exising lN or not. If the former, the count
* must roll back to 0, if the latter, the count must roll back to 1.
*
* Note that we are logging a sequence of nodes and must make sure the
* log can be correctly recovered even if the entire sequence doesn't
* make it to the log. We need to make all children provisional to the
* DIN. This works:
*
* Entry 1: (provisional) DupCountLN (first version)
* Entry 2: (provisional) DupBIN
* Entry 3: DIN
* Entry 4: DupCountLN (second version, incorporating the new count.
* This can't be provisional because we need to possibly
* roll it back.)
* Entry 5: new LN.
* See [SR #10203] for a description of the bug that existed before
* this change.
*/
/* Create the first version of DupCountLN and log it. (Entry 1). */
Locker locker = cursor.getLocker();
long nodeId = existingLN.getNodeId();
/*
* If the existing entry is known to be deleted or was created by this
* transaction, then the DCL should get rolled back to 0, not 1.
* [13726].
*/
int startingCount =
(locker.createdNode(nodeId) ||
existingLNIsDeleted ||
locker.getWriteLockInfo(nodeId).getAbortKnownDeleted()) ?
0 : 1;
DupCountLN dupCountLN = new DupCountLN(startingCount);
long firstDupCountLNLsn =
dupCountLN.logProvisional(env, database.getId(),
key, DbLsn.NULL_LSN);
/* Make the duplicate root and DBIN. */
dupRoot = new DIN(database,
existingKey, // idkey
maxDupTreeEntriesPerNode,
key, // dup key
new ChildReference
(dupCountLN, key, firstDupCountLNLsn),
2); // level
dupRoot.latch();
dupRoot.setIsRoot(true);
dupBin = new DBIN(database,
existingKey, // idkey
maxDupTreeEntriesPerNode,
key, // dup key
1); // level
dupBin.latch();
/*
* Attach the existing LN child to the duplicate BIN. Since this is a
* newly created BIN, insertEntry will be successful.
*/
ChildReference newExistingLNRef = new ChildReference
(existingLN, existingKey, bin.getLsn(index), bin.getState(index));
boolean insertOk = dupBin.insertEntry(newExistingLNRef);
assert insertOk;
try {
/* Entry 2: DBIN. */
long dbinLsn = dupBin.logProvisional(logManager, dupRoot);
inMemoryINs.add(dupBin);
/* Attach the duplicate BIN to the duplicate IN root. */
dupRoot.setEntry(0, dupBin, dupBin.getKey(0),
dbinLsn, dupBin.getState(0));
/* Entry 3: DIN */
long dinLsn = dupRoot.log(logManager);
inMemoryINs.add(dupRoot);
/*
* Now that the DIN is logged, we've created a duplicate tree that
* holds the single, preexisting LN. We can safely create the non
* provisional LNs that pertain to this insert -- the new LN and
* the new DupCountLN.
*
* We request a lock while holding latches which is usually
* forbidden, but safe in this case since we know it will be
* immediately granted (we just created dupCountLN above).
*/
LockResult lockResult = locker.lock
(dupCountLN.getNodeId(), LockType.WRITE, false /*noWait*/,
database);
lockResult.setAbortLsn(firstDupCountLNLsn, false);
dupCountLN.setDupCount(2);
long dupCountLsn = dupCountLN.log(env, database.getId(), key,
firstDupCountLNLsn, locker);
dupRoot.updateDupCountLNRef(dupCountLsn);
/* Add the newly created LN. */
long newLsn =
newLN.log(env, database.getId(), key, DbLsn.NULL_LSN, locker);
int dupIndex = dupBin.insertEntry1
(new ChildReference(newLN, newLNKey, newLsn));
dupIndex &= ~IN.INSERT_SUCCESS;
cursor.updateDBin(dupBin, dupIndex);
/*
* Adjust any cursors positioned on the mutated BIN entry to point
* to the DBIN at the location of the entry we moved there. The
* index of the moved entry is 1 or 0, the XOR of the index of the
* new entry.
*/
bin.adjustCursorsForMutation(index, dupBin, dupIndex ^ 1, cursor);
dupBin.releaseLatch();
/*
* Update the "regular" BIN to point to the new duplicate tree
* instead of the existing LN. Clear the MIGRATE flag since it
* applies only to the original LN.
*/
bin.updateEntry(index, dupRoot, dinLsn);
bin.setMigrate(index, false);
traceMutate(Level.FINE, bin, existingLN, newLN, newLsn,
dupCountLN, dupCountLsn, dupRoot, dinLsn,
dupBin, dbinLsn);
} catch (DatabaseException e) {
/*
* Strictly speaking, not necessary to release latches, because if
* we fail to log the entries, we just throw them away, but our
* unit tests check for 0 latches held in the event of a logging
* error.
*/
dupBin.releaseLatchIfOwner();
dupRoot.releaseLatchIfOwner();
throw e;
}
return dupRoot;
}
/**
* Validate args passed to insert. Presently this just means making sure
* that if they say duplicates are allowed that the database supports
* duplicates.
*/
private void validateInsertArgs(boolean allowDuplicates)
throws DatabaseException {
if (allowDuplicates && !database.getSortedDuplicates()) {
throw new DatabaseException
("allowDuplicates passed to insert but database doesn't " +
"have allow duplicates set.");
}
}
/**
* Find the BIN that is relevant to the insert. If the tree doesn't exist
* yet, then create the first IN and BIN.
* @return the BIN that was found or created and return it latched.
*/
private BIN findBinForInsert(byte[] key,
LogManager logManager,
INList inMemoryINs,
CursorImpl cursor)
throws DatabaseException {
BIN bin;
/* First try using the BIN at the cursor position to avoid a search. */
bin = cursor.latchBIN();
if (bin != null) {
if (!bin.needsSplitting() && bin.isKeyInBounds(key)) {
return bin;
} else {
bin.releaseLatch();
}
}
boolean rootLatchIsHeld = false;
try {
long logLsn;
/*
* We may have to try several times because of a small
* timing window, explained below.
*/
while (true) {
rootLatchIsHeld = true;
rootLatch.acquireShared();
if (root == null) {
rootLatch.release();
rootLatch.acquireExclusive();
if (root != null) {
rootLatch.release();
rootLatchIsHeld = false;
continue;
}
/*
* This is an empty tree, either because it's brand new
* tree or because everything in it was deleted. Create an
* IN and a BIN. We could latch the rootIN here, but
* there's no reason to since we're just creating the
* initial tree and we have the rootLatch held. Log the
* nodes as soon as they're created, but remember that
* referred-to children must come before any references to
* their LSNs.
*/
/* First BIN in the tree, log provisionally right away. */
bin = new BIN(database, key, maxMainTreeEntriesPerNode, 1);
bin.latch();
logLsn = bin.logProvisional(logManager, null);
/*
* Log the root right away. Leave the root dirty, because
* the MapLN is not being updated, and we want to avoid
* this scenario from [#13897], where the LN has no
* possible parent.
* provisional BIN
* root IN
* checkpoint start
* LN is logged
* checkpoint end
* BIN is dirtied, but is not part of checkpoint
*/
IN rootIN =
new IN(database, key, maxMainTreeEntriesPerNode, 2);
rootIN.setIsRoot(true);
boolean insertOk = rootIN.insertEntry
(new ChildReference(bin, key, logLsn));
assert insertOk;
logLsn = rootIN.log(logManager);
rootIN.setDirty(true); /*force re-logging, see [#13897]*/
root = new ChildReference(rootIN,
new byte[0],
logLsn);
/* Add the new nodes to the in memory list. */
inMemoryINs.add(bin);
inMemoryINs.add(rootIN);
rootLatch.release();
rootLatchIsHeld = false;
break;
} else {
rootLatch.release();
rootLatchIsHeld = false;
/*
* There's a tree here, so search for where we should
* insert. However, note that a window exists after we
* release the root latch. We release the latch because the
* search method expects to take the latch. After the
* release and before search, the INCompressor may come in
* and delete the entire tree, so search may return with a
* null.
*/
IN in = searchSplitsAllowed
(key, -1, true /*updateGeneration*/);
if (in == null) {
/* The tree was deleted by the INCompressor. */
continue;
} else {
/* search() found a BIN where this key belongs. */
bin = (BIN) in;
break;
}
}
}
} finally {
if (rootLatchIsHeld) {
rootLatch.release();
}
}
/* testing hook to insert item into log. */
if (ckptHook != null) {
ckptHook.doHook();
}
return bin;
}
/*
* Given a subtree root (an IN), remove it and all of its children from the
* in memory IN list. Also count removed nodes as obsolete and gather the
* set of file summaries that should be logged. The tracker will be flushed
* to the log later.
*/
private void accountForSubtreeRemoval(INList inList,
IN subtreeRoot,
UtilizationTracker tracker)
throws DatabaseException {
inList.latchMajor();
try {
subtreeRoot.accountForSubtreeRemoval(inList, tracker);
} finally {
inList.releaseMajorLatch();
}
Tracer.trace(Level.FINE, database.getDbEnvironment(),
"SubtreeRemoval: subtreeRoot = " +
subtreeRoot.getNodeId());
}
/*
* Logging support
*/
/**
* @see LogWritable#getLogSize
*/
public int getLogSize() {
int size = LogUtils.getBooleanLogSize(); // root exists?
if (root != null) {
size += root.getLogSize(); // root
}
return size;
}
/**
* @see LogWritable#writeToLog
*/
public void writeToLog(ByteBuffer logBuffer) {
LogUtils.writeBoolean(logBuffer, (root != null));
if (root != null) {
root.writeToLog(logBuffer);
}
}
/**
* @see LogReadable#readFromLog
*/
public void readFromLog(ByteBuffer itemBuffer, byte entryTypeVersion) {
boolean rootExists = LogUtils.readBoolean(itemBuffer);
if (rootExists) {
root = makeRootChildReference();
root.readFromLog(itemBuffer, entryTypeVersion);
}
}
/**
* @see LogReadable#dumpLog
*/
public void dumpLog(StringBuffer sb, boolean verbose) {
sb.append("<root>");
if (root != null) {
root.dumpLog(sb, verbose);
}
sb.append("</root>");
}
/**
* @see LogReadable#isTransactional
*/
public boolean logEntryIsTransactional() {
return false;
}
/**
* @see LogReadable#getTransactionId
*/
public long getTransactionId() {
return 0;
}
/**
* rebuildINList is used by recovery to add all the resident nodes to the
* IN list.
*/
public void rebuildINList()
throws DatabaseException {
INList inMemoryList = database.getDbEnvironment().getInMemoryINs();
if (root != null) {
rootLatch.acquireShared();
try {
Node rootIN = root.getTarget();
if (rootIN != null) {
rootIN.rebuildINList(inMemoryList);
}
} finally {
rootLatch.release();
}
}
}
/*
* Debugging stuff.
*/
public void dump()
throws DatabaseException {
System.out.println(dumpString(0));
}
public String dumpString(int nSpaces)
throws DatabaseException {
StringBuffer sb = new StringBuffer();
sb.append(TreeUtils.indent(nSpaces));
sb.append("<tree>");
sb.append('\n');
if (root != null) {
sb.append(DbLsn.dumpString(root.getLsn(), nSpaces));
sb.append('\n');
IN rootIN = (IN) root.getTarget();
if (rootIN == null) {
sb.append("<in/>");
} else {
sb.append(rootIN.toString());
}
sb.append('\n');
}
sb.append(TreeUtils.indent(nSpaces));
sb.append("</tree>");
return sb.toString();
}
/**
* Unit test support to validate subtree pruning. Didn't want to make root
* access public.
*/
boolean validateDelete(int index)
throws DatabaseException {
rootLatch.acquireShared();
try {
IN rootIN = (IN) root.fetchTarget(database, null);
return rootIN.validateSubtreeBeforeDelete(index);
} finally {
rootLatch.release();
}
}
/**
* Debugging check that all resident nodes are on the INList and no stray
* nodes are present in the unused portion of the IN arrays.
*/
public void validateINList(IN parent)
throws DatabaseException {
if (parent == null) {
parent = (IN) root.getTarget();
}
if (parent != null) {
INList inList = database.getDbEnvironment().getInMemoryINs();
if (!inList.getINs().contains(parent)) {
throw new DatabaseException
("IN " + parent.getNodeId() + " missing from INList");
}
for (int i = 0;; i += 1) {
try {
Node node = parent.getTarget(i);
if (i >= parent.getNEntries()) {
if (node != null) {
throw new DatabaseException
("IN " + parent.getNodeId() +
" has stray node " + node.getNodeId() +
" at index " + i);
}
byte[] key = parent.getKey(i);
if (key != null) {
throw new DatabaseException
("IN " + parent.getNodeId() +
" has stray key " + key +
" at index " + i);
}
}
if (node instanceof IN) {
validateINList((IN) node);
}
} catch (ArrayIndexOutOfBoundsException e) {
break;
}
}
}
}
/* For unit testing only. */
public void setWaitHook(TestHook hook) {
waitHook = hook;
}
/* For unit testing only. */
public void setSearchHook(TestHook hook) {
searchHook = hook;
}
/* For unit testing only. */
public void setCkptHook(TestHook hook) {
ckptHook = hook;
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceSplitRoot(Level level,
String splitType,
IN newRoot,
long newRootLsn,
IN oldRoot,
long oldRootLsn) {
Logger logger = database.getDbEnvironment().getLogger();
if (logger.isLoggable(level)) {
StringBuffer sb = new StringBuffer();
sb.append(splitType);
sb.append(" newRoot=").append(newRoot.getNodeId());
sb.append(" newRootLsn=").
append(DbLsn.getNoFormatString(newRootLsn));
sb.append(" oldRoot=").append(oldRoot.getNodeId());
sb.append(" oldRootLsn=").
append(DbLsn.getNoFormatString(oldRootLsn));
logger.log(level, sb.toString());
}
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceMutate(Level level,
BIN theBin,
LN existingLn,
LN newLn,
long newLsn,
DupCountLN dupCountLN,
long dupRootLsn,
DIN dupRoot,
long ddinLsn,
DBIN dupBin,
long dbinLsn) {
Logger logger = database.getDbEnvironment().getLogger();
if (logger.isLoggable(level)) {
StringBuffer sb = new StringBuffer();
sb.append(TRACE_MUTATE);
sb.append(" existingLn=");
sb.append(existingLn.getNodeId());
sb.append(" newLn=");
sb.append(newLn.getNodeId());
sb.append(" newLnLsn=");
sb.append(DbLsn.getNoFormatString(newLsn));
sb.append(" dupCountLN=");
sb.append(dupCountLN.getNodeId());
sb.append(" dupRootLsn=");
sb.append(DbLsn.getNoFormatString(dupRootLsn));
sb.append(" rootdin=");
sb.append(dupRoot.getNodeId());
sb.append(" ddinLsn=");
sb.append(DbLsn.getNoFormatString(ddinLsn));
sb.append(" dbin=");
sb.append(dupBin.getNodeId());
sb.append(" dbinLsn=");
sb.append(DbLsn.getNoFormatString(dbinLsn));
sb.append(" bin=");
sb.append(theBin.getNodeId());
logger.log(level, sb.toString());
}
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceInsert(Level level,
EnvironmentImpl env,
BIN insertingBin,
LN ln,
long lnLsn,
int index) {
Logger logger = env.getLogger();
if (logger.isLoggable(level)) {
StringBuffer sb = new StringBuffer();
sb.append(TRACE_INSERT);
sb.append(" bin=");
sb.append(insertingBin.getNodeId());
sb.append(" ln=");
sb.append(ln.getNodeId());
sb.append(" lnLsn=");
sb.append(DbLsn.getNoFormatString(lnLsn));
sb.append(" index=");
sb.append(index);
logger.log(level, sb.toString());
}
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceInsertDuplicate(Level level,
EnvironmentImpl env,
BIN insertingDBin,
LN ln,
long lnLsn,
long binNid) {
Logger logger = env.getLogger();
if (logger.isLoggable(level)) {
StringBuffer sb = new StringBuffer();
sb.append(TRACE_INSERT_DUPLICATE);
sb.append(" dbin=");
sb.append(insertingDBin.getNodeId());
sb.append(" bin=");
sb.append(binNid);
sb.append(" ln=");
sb.append(ln.getNodeId());
sb.append(" lnLsn=");
sb.append(DbLsn.getNoFormatString(lnLsn));
logger.log(level, sb.toString());
}
}
static private class SplitInfo {
IN parent;
IN child;
int index;
SplitInfo(IN parent, IN child, int index) {
this.parent = parent;
this.child = child;
this.index = index;
}
}
}