
Source Code of$DeltaINEntry

* See the file LICENSE for redistribution information.
* Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.


import java.util.HashMap;
import java.util.List;
import java.util.Map;


* SortedLSNTreeWalker uses ordered disk access rather than random access to
* iterate over a database tree. Faulting in data records by on-disk order can
* provide much improved performance over faulting in by key order, since the
* latter may require random access.  SortedLSN walking does not obey cursor
* and locking constraints, and therefore can only be guaranteed consistent for
* a quiescent tree which is not being modified by user or daemon threads.
* The class walks over the tree using sorted LSN fetching for parts of the
* tree that are not in memory. It returns LSNs for each node in the tree,
* <b>except</b> the root IN, in an arbitrary order (i.e. not key
* order). The caller is responsible for getting the root IN's LSN explicitly.
* <p>
* A callback function specified in the constructor is executed for each LSN
* found.
* <p>
* The walker works in two phases.  The first phase is to gather and return all
* the resident INs using the roots that were specified when the SLTW was
* constructed.  For each child of each root, if the child is resident it is
* passed to the callback method (processLSN).  If the child was not in memory,
* it is added to a list of LSNs to read.  When all of the in-memory INs have
* been passed to the callback for all LSNs collected, phase 1 is complete.
* <p>
* In phase 2, for each of the sorted LSNs, the target is fetched, the type
* determined, and the LSN and type passed to the callback method for
* processing.  LSNs of the children of those nodes are retrieved and the
* process repeated until there are no more nodes to be fetched for this
* database's tree.  LSNs are accumlated in batches in this phase so that
* memory consumption is not excessive.  For instance, if batches were not used
* then the LSNs of all of the BINs would need to be held in memory.
public class SortedLSNTreeWalker {

     * The interface for calling back to the user with each LSN.
    public interface TreeNodeProcessor {
        void processLSN(long childLSN,
                        LogEntryType childType,
                        Node theNode,
                        byte[] lnKey)
            throws FileNotFoundException, DatabaseException;

        /* Used for processing dirty (unlogged) deferred write LNs. [#15365] */
        void processDirtyDeletedLN(long childLSN, LN ln, byte[] lnKey)
            throws DatabaseException;

        /* Called when the internal memory limit is exceeded. */
        void noteMemoryExceeded();

     * Optionally passed to the SortedLSNTreeWalker to be called when an
     * exception occurs.
    public interface ExceptionPredicate {
        /* Return true if the exception can be ignored. */
        boolean ignoreException(Exception e);

    protected final DatabaseImpl[] dbImpls;
    protected final EnvironmentImpl envImpl;

     * Save the root LSN at construction time, because the root may be
     * nulled out before walk() executes.
    private final long[] rootLsns;

     * Whether to call DatabaseImpl.finishedINListHarvest().
    private final boolean setDbState;

    /* The limit on memory to be used for internal structures during SLTW. */
    private long internalMemoryLimit = Long.MAX_VALUE;

    /* The current memory usage by internal SLTW structures. */
    private long internalMemoryUsage;

    private final TreeNodeProcessor callback;

     * If true, then walker should fetch LNs and pass them to the
     * TreeNodeProcessor callback method.  Even if true, dup LNs are not
     * fetched because they are normally never used (see accumulateDupLNs).
    protected boolean accumulateLNs = false;

     * If true, fetch LNs in a dup DB.  Since LNs in a dup DB are not used by
     * cursor operations, fetching dup LNs should only be needed in very
     * exceptional situations.  Currently this field is never set to true.
    protected boolean accumulateDupLNs = false;

     * If non-null, save any exceptions encountered while traversing nodes into
     * this savedException list, in order to walk as much of the tree as
     * possible. The caller of the tree walker will handle the exceptions.
    private final List<DatabaseException> savedExceptions;

    private final ExceptionPredicate excPredicate;

     * The batch size of LSNs which will be sorted.
    private long lsnBatchSize = Long.MAX_VALUE;

    /* Holder for returning LN key from fetchLSN. */
    private final DatabaseEntry lnKeyEntry = new DatabaseEntry();

     * This map provides an LSN to IN/index. When an LSN is processed by the
     * tree walker, the map is used to lookup the parent IN and child entry
     * index of each LSN processed by the tree walker.  Since fetchLSN is
     * called with an arbitrary LSN, and since when we fetch (for preload) we
     * need to setup the parent to refer to the node which we are prefetching,
     * we need to have the parent in hand at the time of the call to fetchLSN.
     * This map allows us to keep a reference to that parent so that we can
     * call fetchNode on that parent.
     * It is also necessary to maintain this map for cases other than preload()
     * so that during multi-db walks (i.e. multi db preload), we can associate
     * an arbitrary LSN back to the parent IN and therefore connect a fetch'ed
     * Node into the proper place in the tree.
     * LSN -> INEntry
    /* struct to hold IN/entry-index pair. */
    public static class INEntry {
        final IN in;
        final int index;

        INEntry(IN in, int index) {
            assert in != null;
            assert in.getDatabase() != null;
   = in;
            this.index = index;

        public INEntry(@SuppressWarnings("unused") SizeofMarker marker) {
   = null;
            this.index = 0;

        BINDelta getDelta() {
            return null;

        long getDeltaLsn() {
            return DbLsn.NULL_LSN;

        long getMemorySize() {
            return MemoryBudget.HASHMAP_ENTRY_OVERHEAD +

     * Supplements INEntry with BINDelta information.  When a BINDelta is
     * encountered during the fetching process, we cannot immediately place it
     * in the tree.  Instead we queue a DeltaINEntry for fetching the full BIN,
     * in LSN order as usual.  When the full BIN is fetched, the DeltaINEntry
     * is used to apply the delta and place the result in the tree.
    public static class DeltaINEntry extends INEntry {
        private final BINDelta delta;
        private final long deltaLsn;

        DeltaINEntry(IN in, int index, BINDelta delta, long deltaLsn) {
            super(in, index);
            assert (delta != null);
            assert (deltaLsn != DbLsn.NULL_LSN);
   = delta;
            this.deltaLsn = deltaLsn;

        public DeltaINEntry(SizeofMarker marker) {
   = null;
            this.deltaLsn = 0;

        BINDelta getDelta() {
            return delta;

        long getDeltaLsn() {
            return deltaLsn;

        long getMemorySize() {
            return MemoryBudget.HASHMAP_ENTRY_OVERHEAD +
                   MemoryBudget.DELTAINENTRY_OVERHEAD +

    private final Map<Long, INEntry> lsnINMap = new HashMap<Long, INEntry>();

     * @param dbImpls an array of DatabaseImpls which should be walked over
     * in disk order.  This array must be parallel to the rootLsns array in
     * that rootLsns[i] must be the root LSN for dbImpls[i].
     * @param setDbState if true, indicate when the INList harvest has
     * completed for a particular DatabaseImpl.
     * @param rootLsns is passed in addition to the dbImpls, because the
     * root may be nulled out on the dbImpl before walk() is called.
     * @param callback the callback instance
     * @param savedExceptions a List of DatabaseExceptions encountered during
     * the tree walk.
     * @param excPredicate a predicate to determine whether a given exception
     * should be ignored.
    public SortedLSNTreeWalker(DatabaseImpl[] dbImpls,
                               boolean setDbState,
                               long[] rootLsns,
                               TreeNodeProcessor callback,
                               List<DatabaseException> savedExceptions,
                               ExceptionPredicate excPredicate)
        throws DatabaseException {

        if (dbImpls == null || dbImpls.length < 1) {
            throw EnvironmentFailureException.unexpectedState
                ("DatabaseImpls array is null or 0-length for " +

        this.dbImpls = dbImpls;
        this.envImpl = dbImpls[0].getDbEnvironment();
        /* Make sure all databases are from the same environment. */
        for (DatabaseImpl di : dbImpls) {
            EnvironmentImpl ei = di.getDbEnvironment();
            if (ei == null) {
                throw EnvironmentFailureException.unexpectedState
                    ("environmentImpl is null for target db " +

            if (ei != this.envImpl) {
                throw new IllegalArgumentException
                    ("Environment.preload() must be called with Databases " +
                     "which are all in the same Environment. (" +
                     di.getDebugName() + ")");

        this.setDbState = setDbState;
        this.rootLsns = rootLsns;
        this.callback = callback;
        this.savedExceptions = savedExceptions;
        this.excPredicate = excPredicate;

    void setLSNBatchSize(long lsnBatchSize) {
        this.lsnBatchSize = lsnBatchSize;

    void setInternalMemoryLimit(long internalMemoryLimit) {
        this.internalMemoryLimit = internalMemoryLimit;

    void incInternalMemoryUsage(long increment) {
        internalMemoryUsage += increment;

     * Find all non-resident nodes, and execute the callback.  The root IN's
     * LSN is not returned to the callback.
    public void walk()
        throws DatabaseException {


    protected void walkInternal()
        throws DatabaseException {

         * Phase 1: seed the SLTW with all of the roots of the DatabaseImpl[].
         * For each root, look for all in-memory child nodes and process them
         * (i.e. invoke the callback on those LSNs).  For child nodes which are
         * not in-memory (i.e. they are LSNs only and no Node references),
         * accumulate their LSNs to be later sorted and processed during phase
         * 2.
        LSNAccumulator pendingLSNs = new LSNAccumulator(this);
        for (int i = 0; i < dbImpls.length; i += 1) {
            processRootLSN(dbImpls[i], pendingLSNs, rootLsns[i]);

         * Phase 2: Sort and process any LSNs we've gathered so far. For each
         * LSN, fetch the target record and process it as in Phase 1 (i.e.
         * in-memory children get passed to the callback, not in-memory children
         * have their LSN accumulated for later sorting, fetching, and
         * processing.

     * Process a batch of LSNs by sorting and fetching each of them.
    protected void processAccumulatedLSNs(LSNAccumulator pendingLSNs) {
        long[] currentLSNs = null;
        while (!pendingLSNs.isEmpty()) {
            currentLSNs = pendingLSNs.getAndSortPendingLSNs();
            pendingLSNs = new LSNAccumulator(this);
            for (long lsn : currentLSNs) {
                fetchAndProcessLSN(lsn, pendingLSNs);

     * Retrieve the root for the given DatabaseImpl and then process its
     * children.
    private void processRootLSN(DatabaseImpl dbImpl,
                                LSNAccumulator pendingLSNs,
                                long rootLsn) {
        IN root = getOrFetchRootIN(dbImpl, rootLsn);
        if (root != null) {
            try {
                accumulateLSNs(root, pendingLSNs);
            } finally {

        if (setDbState) {

    protected void accumulateLSNs(IN in, LSNAccumulator pendingLSNs)
        throws DatabaseException {

        boolean accumulateChildren = true;

         * If this is the bottom of the tree and we're not accumulating LNs,
         * then there's no need to accumulate any more LSNs, but we still need
         * to callback with each of them.
        DatabaseImpl db = in.getDatabase();
        boolean dups = db.getSortedDuplicates();

         * Without dups, all BINs contain only LN children.  With dups, it
         * depends on the dup format.  Preload works with the old dup format
         * and the new.
         * In the new dup format (or after dup conversion), BINs contain only
         * LNs and no DBINs exist.  In the old dup format, DBINs contain only
         * LN children, but BINs may contain a mix of LNs and DINs.
        boolean allChildrenAreLNs;
        if (!dups || db.getDupsConverted()) {
            allChildrenAreLNs = in.isBIN();
        } else {
            allChildrenAreLNs =
                in.isBIN() && (!dups || in.containsDuplicates());

         * If LNs are not needed, there is no need to accumulate the child LSNs
         * when all children are LNs.
        if (allChildrenAreLNs) {
            accumulateChildren = dups ? accumulateDupLNs : accumulateLNs;

         * Process all children, but only accumulate LSNs for children that are
         * not in memory.
        for (int i = 0; i < in.getNEntries(); i += 1) {

            long lsn = in.getLsn(i);
            Node node = in.getTarget(i);
            boolean nodeInCache = node != null;
            byte[] lnKey =
                (!nodeInCache || node.isLN()) ? in.getKey(i) : null;

            if (in.isEntryPendingDeleted(i) ||
                in.isEntryKnownDeleted(i)) {

                /* Dirty LNs (deferred write) get special treatment. */
                processDirtyLN(node, lsn, lnKey);
                /* continue; */

            } else if (accumulateChildren &&
                       !nodeInCache &&
                       lsn != DbLsn.NULL_LSN) {

                /* Out of cache node, handle the LSN. */
                addToLsnINMap(lsn, in, i);
                /* callback.processLSN is called when we fetch this LSN. */

                 * If we've exceeded the batch size then process the current
                 * batch and start a new one.
                boolean internalMemoryExceeded =
                    internalMemoryUsage > internalMemoryLimit;
                if (pendingLSNs.getNTotalEntries() > lsnBatchSize ||
                    internalMemoryExceeded) {
                    if (internalMemoryExceeded) {

            } else {

                /* Child is in cache. */
                processResidentChild(lsn, node, lnKey, pendingLSNs);

    private void processDirtyLN(Node node, long lsn, byte[] lnKey) {
        if (node != null && node.isLN()) {
            LN ln = (LN) node;
            if (ln.isDirty()) {
                callback.processDirtyDeletedLN(lsn, ln, lnKey);

    protected void processResidentChild(long lsn,
                                        Node node,
                                        byte[] lnKey,
                                        LSNAccumulator pendingLSNs) {

        boolean nodeInCache = (node != null);

         * If the child is resident, use that log type, else we can assume it's
         * an LN.
            (lsn, (!nodeInCache ?
                   LogEntryType.LOG_INS_LN /* Any LN type will do */ :
             node, lnKey);
        if (nodeInCache && node.isIN()) {
            IN nodeAsIN = (IN) node;
            try {
                accumulateLSNs(nodeAsIN, pendingLSNs);
            } finally {

     * Fetch the node at 'lsn' and callback to let the invoker process it.  If
     * it is an IN, accumulate LSNs for it.
    private void fetchAndProcessLSN(long lsn, LSNAccumulator pendingLSNs)
        throws DatabaseException {

        Node node = fetchLSNHandleExceptions(lsn, lnKeyEntry, pendingLSNs);
        if (node == null) {
        boolean isIN = (node instanceof IN);
        IN in = null;
        try {
            if (isIN) {
                in = (IN) node;
                (lsn, node.getGenericLogType(), node, lnKeyEntry.getData());

            if (isIN) {
                accumulateLSNs(in, pendingLSNs);
        } finally {
            if (isIN) {

    private Node fetchLSNHandleExceptions(long lsn,
                                          DatabaseEntry lnKeyEntry,
                                          LSNAccumulator pendingLSNs) {

        DatabaseException dbe = null;

        try {
            return fetchLSN(lsn, lnKeyEntry, pendingLSNs);
        } catch (FileNotFoundException e) {
            if (excPredicate == null ||
                !excPredicate.ignoreException(e)) {
                dbe = new EnvironmentFailureException
                     EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e);
        } catch (DatabaseException e) {
            if (excPredicate == null ||
                !excPredicate.ignoreException(e)) {
                dbe = e;

        if (dbe != null) {
            if (savedExceptions != null) {

                 * This LSN fetch hit a failure. Do as much of the rest of
                 * the tree as possible.
            } else {
                throw dbe;

        return null;

    protected void callProcessLSNHandleExceptions(long childLSN,
                                                  LogEntryType childType,
                                                  Node theNode,
                                                  byte[] lnKey) {
        DatabaseException dbe = null;

        try {
            callback.processLSN(childLSN, childType, theNode, lnKey);
        } catch (FileNotFoundException e) {
            if (excPredicate == null ||
                !excPredicate.ignoreException(e)) {
                dbe = new EnvironmentFailureException
                     EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e);
        } catch (DatabaseException e) {
            if (excPredicate == null ||
                !excPredicate.ignoreException(e)) {
                dbe = e;

        if (dbe != null) {
            if (savedExceptions != null) {

                 * This LSN fetch hit a failure. Do as much of the rest of
                 * the tree as possible.
            } else {
                throw dbe;

     * Returns the root IN, latched shared.  Allows subclasses to override
     * getResidentRootIN and/or getRootIN to modify behavior.
     * getResidentRootIN is called first,
    protected IN getOrFetchRootIN(DatabaseImpl dbImpl, long rootLsn) {
        final IN root = getResidentRootIN(dbImpl);
        if (root != null) {
            return root;
        if (rootLsn == DbLsn.NULL_LSN) {
            return null;
        return getRootIN(dbImpl, rootLsn);

     * The default behavior fetches the rootIN from the log and latches it
     * shared. Classes extending this may fetch (and latch) the root from the
     * tree.
    protected IN getRootIN(DatabaseImpl dbImpl, long rootLsn) {
        final IN root = (IN)
        if (root == null) {
            return null;
        return root;

     * The default behavior returns (and latches shared) the IN if it is
     * resident in the Btree, or null otherwise.  Classes extending this may
     * return (and latch) a known IN object.
    protected IN getResidentRootIN(DatabaseImpl dbImpl) {
        return dbImpl.getTree().getResidentRootIN(true /*latched*/);

     * Release the latch.  Overriding this method should not be necessary.
    protected void releaseRootIN(IN root) {

     * Add an LSN-IN/index entry to the map.
    private void addToLsnINMap(long lsn, IN in, int index) {
        addEntryToLsnMap(lsn, new INEntry(in, index));

     * Add an LSN-IN/index entry, along with a delta and delta LSN, to the map.
    private void addToLsnINMap(long lsn,
                               IN in,
                               int index,
                               BINDelta delta,
                               long deltaLsn) {
        addEntryToLsnMap(lsn, new DeltaINEntry(in, index, delta, deltaLsn));

    private void addEntryToLsnMap(long lsn, INEntry inEntry) {
        if (lsnINMap.put(lsn, inEntry) == null) {

     * Process an LSN.  Get & remove its INEntry from the map, then fetch the
     * target at the INEntry's IN/index pair.  This method will be called in
     * sorted LSN order.
    protected Node fetchLSN(long lsn,
                            DatabaseEntry lnKeyEntry,
                            LSNAccumulator pendingLSNs)
        throws FileNotFoundException, DatabaseException {

        INEntry inEntry = lsnINMap.remove(lsn);
        assert (inEntry != null) : DbLsn.getNoFormatString(lsn);
        incInternalMemoryUsage(- inEntry.getMemorySize());
        IN in =;
        boolean isLatchedAlready = in.isLatchOwnerForWrite();
        if (!isLatchedAlready) {

        try {
            int index = inEntry.index;

            /* Preload latches the root, so why do these double checks? */
            if (in.isEntryKnownDeleted(index)) {
                return null;
            if (inEntry.getDelta() == null) {
                if (in.getLsn(index) != lsn) {
                    return null;
            } else {
                if (in.getLsn(index) != inEntry.getDeltaLsn()) {
                    return null;

            /* Fetch log entry. */
            LogEntry entry = envImpl.getLogManager().getLogEntry(lsn);

             * For a BINDeltaLogEntry, queue fetching of the full BIN and
             * combine the full BIN with the delta when it is processed later.
             * See call to reconstituteBIN below.
            if (entry instanceof BINDeltaLogEntry) {
                BINDelta delta = (BINDelta) entry.getMainItem();
                long fullLsn = delta.getLastFullLsn();
                addToLsnINMap(fullLsn, in, index, delta, lsn);
                return null;

            /* For an LNLogEntry, call postFetchInit and get the lnKey. */
            DatabaseImpl dbImpl = in.getDatabase();
            byte[] lnKey = null;
            if (entry instanceof LNLogEntry) {
                LNLogEntry lnEntry = (LNLogEntry) entry;
                lnKey = lnEntry.getKey();

            /* Get the Node from the LogEntry. */
            Node ret = (Node) entry.getResolvedItem(dbImpl);

             * For an IN Node, set the database so it will be passed down to
             * nested fetches.
            long lastLoggedLsn = lsn;
            if (ret instanceof IN) {
                IN retIn = (IN) ret;

             * If there is a delta, then this is a BIN to which the delta must
             * be applied.  The delta LSN is the last logged LSN.
            BINDelta delta = inEntry.getDelta();
            if (delta != null) {
                assert lsn == delta.getLastFullLsn();
                assert ret instanceof BIN;
                delta.reconstituteBIN(dbImpl, (BIN) ret);
                lastLoggedLsn = inEntry.getDeltaLsn();

            /* During a preload, finally place the Node into the Tree. */
            if (fetchAndInsertIntoTree()) {
                ret.postFetchInit(dbImpl, lastLoggedLsn);
                in.updateNode(index, ret, lnKey);
            return ret;
        } finally {
            if (!isLatchedAlready) {

     * Overriden by subclasses if fetch of an LSN should result in insertion
     * into tree rather than just instantiating the target.
    protected boolean fetchAndInsertIntoTree() {
        return false;

    public List<DatabaseException> getSavedExceptions() {
        return savedExceptions;

