Source Code of com.sleepycat.je.rep.vlsn.VLSNIndex$WaitTimeOutException

/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.
 *
 */


package com.sleepycat.je.rep.vlsn;


import static com.sleepycat.je.utilint.VLSN.NULL_VLSN;


import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;


import com.sleepycat.bind.tuple.LongBinding;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.CursorConfig;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Durability;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.TransactionConfig;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.DbType;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.recovery.RecoveryInfo;
import com.sleepycat.je.rep.impl.RepParams;
import com.sleepycat.je.rep.impl.node.NameIdPair;
import com.sleepycat.je.rep.vlsn.VLSNRange.VLSNRangeBinding;
import com.sleepycat.je.txn.BasicLocker;
import com.sleepycat.je.txn.Locker;
import com.sleepycat.je.txn.Txn;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;


/**
 * A VLSN (Virtual LSN) is used to identify every log entry shared between
 * members of the replication group. Since a JE log is identified by LSNs, we
 * must have a way to map VLSN->LSNs in order to fetch a replicated log record
 * from the local log, using the VLSN.  The VLSNIndex implements those
 * mappings. The VLSNIndex has these responsibilities:
 *
 * Generating new VLSNs.
 *   Only masters need to generate VLSNs, but any node may have the potential
 *   to be a master. The VLSN sequence must ascend over time and across
 *   recoveries, so the VSLN id must be preserved much like the database, node
 *   and txn ids.
 * Maintaining the VLSN range.
 *   Although each node needs to receive and store each log entry from the
 *   replication stream, over time the part of the stream that is stored can be
 *   reduced, either by log cleaning, or by syncups which can truncate the
 *   replication stream. A node always holds a contiguous portion of the
 *   replication stream. The VLSN range identifies that portion by having the
 *   start and end VLSNs, as well as key landmarks such as the lastSync-able
 *   log entry and the last commit log entry. VLSN range information is used by
 *   elections and syncup.
 * Gatekeeper for waiting for the most recently logged entries.
 *   Feeders block upon the VLSNIndex when they are trying to fetch the most
 *   recently logged entries. These recent log entries are held in a two level
 *   cache within the VLSNIndex. A call to VLSNIndex.waitForLsn() goes through
 *   this sequence:
 *   1) check the log item stored in the vlsn wait latch, if the call did wait.
 *   2) check the log item cache
 *   If both fail, the FeederReader will fetch the required log entry from log
 *   buffers or disk
 * Providing the LSN mapping for a log record identified by its VLSN.
 *   The Feeders and the syncup protocol both need to retrieve log records
 *   by VLSN. To do that, we need an LSN mapping.
 *
 * Mappings are added to VLSNIndex when replicated log entries are written into
 * the local log.  Although all mappings are registered, the VLSNIndex does not
 * keep every one, in order to save on disk and in-memory storage. Only a
 * sparse set is kept. When searching for a log entry by VLSN, the caller uses
 * the closest available mapping and then scans the log looking for that entry.
 *
 * The VLSNIndex relies on the assumption that VLSN tagged log entries are
 * ordered and contiguous in the log. That is, the LSN for VLSN 1 is < the LSN
 * for VLSN 2 < LSN for VLSN 3, and there is never a gap in the VLSNs. However,
 * at node syncup, the replication stream may need to be truncated when rolling
 * back a non-committed log entry. We can't literally truncate the log files
 * because the JE logs contain intermingled transactional and non transactional
 * information. Instead, the truncation is done both logically by amending the
 * VLSNIndex, and physically by overmarking those entries in the JE
 * logs. Because of that, a physical dump of the log may show some VLSN tagged
 * entries as duplicate and/or out of order because they're abandoned log
 * entries that are not logically part of the replication stream any more
 * For example, the log can look like this:
 *  LSN 100, VLSN 1
 *  LSN 200, VLSN 2  <- overmarked
 *  LSN 300, VLSN 3  <- overmarked
 *   --- syncup, rollback to VLSN 1, restart at VLSN 2
 *  LSN 400, VLSN 2
 *  LSN 500, VLSN 3
 *
 * VLSN->LSN mappings are created under the log write latch, which ensures that
 * all VLSN tagged log entries are ordered in the logical replication stream in
 * the log. However, the mapping is added to the VLSNIndex outside the log
 * write latch, so the VLSNIndex database may have a momentary gap. For
 * example,
 *
 *  t0- thread 1 logs entry at VLSN=1, LSN=100, within log write latch
 *  t1- thread 2 logs entry at VLSN=2, LSN=150, within log write latch
 *  t2- thread 3 logs entry at VLSN=3, LSN=200, within log write latch
 *  t3- thread 1 calls VLSNIndex.put(VLSN=1/LSN=100)
 *  t4- thread 3 calls VLSNIndex.put(VLSN=3/LSN=200)
 *  t5- thread 2 calls VLSNIndex.put(VLSN=2/LSN=150)
 *
 * At t4, the VLSNIndex contains 1/100, 3/200, but not 2/150. However, we know
 * that the VLSNIndex always represents a contiguous range of VLSNs, so the
 * fact that 2/150 is not yet is handled, and is just like the case where
 * the VLSNIndex optimized away the mapping in order to keep the index sparse.
 *
 * We do guarantee that the start and end VLSNs in the range have mappings, in
 * order to always be able to provide a LTE and GTE mapping for all valid
 * VLSNs. Because of that, if a VLSN comes out of order, it does not update the
 * range.
 *
 * Persistent storage:
 *
 *  The VLSN->LSN mappings in the range are grouped into instances of
 *  com.sleepycat.je.util.VLSNBucket. Each bucket knows the first and last VLSN
 *  within its mini-range. We observe these invariants
 *    - buckets are ordered by VLSN in the database and the bucket cache,
 *    - only the last bucket is the target of updates at any time,
 *    - a single bucket corresponds to a single file, but a single file may
 *  have multiple buckets covering it.
 *
 *  While it would be nice to also guarantee that there are no gaps between
 *  buckets, ie:
 *    bucket(N-1).last == bucket(N).first - 1
 *    bucket(N).last  ==  bucket(N-1).first - 1
 *  it is not possible to do so because we the put() call is not serialized
 *  because we don't want to add overhead to the log write latch. In order
 *  to permit out of order puts(), and to require that only the last bucket
 *  is updated, we must permit gaps between buckets.
 *
 *  Buckets are both cached in memory by the VLSNIndex and are stored
 *  persistently in a internal, non-replicated database. The database holds
 *  key/value pairs where
 *
 *    key = bucket.first
 *    data = bucket
 *
 * Since the first valid VLSN is 1, key = -1 is reserved for storage of the
 * VLSNRange.
 *
 * Buckets are filled up as new VLSNs arrive (either because they've been
 * generated by write operations on the master, or because they're incoming
 * operations on the replica). They're flushed to disk periodically rather than
 * with every new VLSN, because the update rate would have too much of a
 * performance impact. Since there is this level of caching happening, we must
 * be careful to write in-memory buckets to disk at well known points to
 * support recoverability. The flushing must be instigated by a third party
 * activity, such as checkpointing, rather than by the action of adding a new
 * mapping. That's because mappings are registered by the logging system, and
 * although we are not holding the log write latch at that point, it seems
 * inadvisable to recursively generate another logging call on behalf of the
 * flush.  Currently the VLSNIndex is flushed to disk at every checkpoint.  It
 * can also optionally happen more often, and (TODO) we may want to do so
 * because we've seen cases where checkpoints take a very long time. Perhaps we
 * should flush when we flip to a new log file?
 *
 * Once written to disk, the buckets are generally not updated. Updates can
 * happen when the range is truncated, such as for syncup rollback, but the
 * system is quiescent at that time. Log cleaning can delete buckets, but will
 * not modify them. The VLSNRange does naturally change often, and that data
 * record does get updated.
 *
 * Recovery:
 *
 * The VLSN database is restored at recovery time just as all other databases
 * are. However, there may be a portion of the VLSN range that was not flushed
 * to disk. At recovery, we piggyback onto the log scanning done and re-track
 * the any mappings found within the recovery range. Those mappings are merged
 * into those stored on disk, so that the VLSNIndex correctly reflects the
 * entire replication stream at startup. For example, suppose a log has:
 *
 * LSN
 * 100      firstActiveLSN
 * 200      Checkpoint start
 * 300      VLSN 78
 * 400      VLSNIndex flushed here
 * 500      Checkpoint end
 * 600      VLSN 79
 *
 * The VLSNIndex is initially populated with the version of the index found
 * at LSN 400. That doesn't include VLSN 79. A tracking pass is done from
 * checkpoint start -> end of log, which sweeps up VLSN 78 and VLSN 79 into
 * a temporary tracker. That tracker is merged in the VLSNIndex, to update
 * its mappings to VLSN 79.
 *
 * Note that the checkpoint VLSNIndex must encompass all vlsn mappings that are
 * prior to the checkpoint start of that recovery period. This follows the
 * general philosophy that checkpoint flushes all metadata, and recovery reads
 * from checkpoint start onewards to add on any neede extra data.
 * Retrieving mappings:
 *
 * Callers who need to retrieve mappings obtain a VLSNScanner, which acts as a
 * cursor over the VLSNIndex. A VLSNScanner finds and saves the applicable
 * VLSNBucket, and queries the bucket directly as long as it can provide
 * mappings. This reduces the level of contention between multiple readers
 * (feeders) and writers (application threads, or the replay thread)
 *
 * Synchronization hierarchy:
 *
 * To write a new mapping, you must have the mutex on the VLSIndex, and then
 * the tracker, which lets you obtain the correct bucket, and then you must
 * have a mutex on the bucket. To read a mapping, you must have the tracker
 * mutex to obtain the right bucket. If you already have the right bucket in
 * hand, you only need the bucket mutex.
 *
 * In truth, buckets which are not the "currentBucket" are not modified again,
 * so a future optimization would allow for reading a mapping on a finished
 * bucket without synchronization.
 *
 * The VLSNRange is updated as an atomic assignment to a volatile field after
 * taking the mutex on the current bucket. It is read without a mutex, by
 * looking at it as a volatile field.
 *
 * The hierarchy is
 *   VLSNIndex -> VLSNTracker -> VLSNBucket
 *   VLSNIndex -> VLSNTracker -> VLSNRange
 *   VLSNIndex -> VLSNIndex.mappingSynchronizer
 *   VLSNIndex.flushSynchronizer -> VLSNTracker
 *
 * Removing mappings vs reading mappings - sync on the range.
 *
 * We also need to consider that fact that callers of the VLSNIndex may be
 * holding other mutex, or IN latches, and that the VLSNIndex methods may do
 * database operations to read or write to the internal VLSN database. That can
 * result in a nested database operation, and we need to be careful to avoid
 * deadlocks. To be safe, we disable critical eviction [#18475]
 * VLSNBucket.writeDatabase().
 *
 * Writers
 * -------
 * Allocating a new VLSN: bump()
 *  - sync on log write latch
 *    Note that since there is no synchronization on the VLSNINdex itself, 
 *    [allocating  new VLSN, logging its entry] and [flushing the vlsn index
 *     to disk] is not atomic. See awaitConsistency().
 *
 * Adding a mapping: put()
 *   - sync on VLSNIndex
 *        -sync on VLSNTracker to access the right bucket, and possibly
 *         create a new bucket. Atomically modify the VLSNRange.
 *
 * Flushing mappings to disk: writeToDatabase()
 *    - sync on VLSNIndex.flushSyncrhonizer -> VLSNTracker
 *
 * Replica side syncup truncates the VLSNIndex from the end:
 *    - no synchronization needed, the system is quiescent, and we can assume
 *      that VLSNs are neither read nor written by other threads.
 *
 * Log cleaning truncates the VLSNIndex from the beginning:
 *    We assume that the log cleaner is prohibited from deleting files that are
 *    being used for current feeding. We can also assume that the end of the
 *    log is not being deleted, and that we're not conflict with put(). We do
 *    have to worry about conflicting with backwards scans when executing
 *    syncup as a feeder, and with flushing mappings to disk. Shall we
 *    disable log file deletion at this point?
 *
 *   Steps to take:
 *
 *    First change the VLSNRange:
 *    - sync on VLSNIndex
 *           - atomically modify the VLSNRange to ensure that no readers or
 *             writers touch the buckets that will be deleted.
 *           - sync on VLSNTracker to delete any dead buckets. Do that before
 *             updating the on-disk database, so that we don't lose any
 *             buckets to writeToDatabase().
 *     - without synchronization, scan the database and non-transactionally
 *       delete any on-disk buckets that are <= the log cleaned file.
 *
 * Readers
 * -------
 * Active forward feeder checks if a mapping exists, and waits if necessary
 *    - read the current VLSNRange w/out a mutex. If not satisfactory
 *        - sync on VLSNIndex
 *              - sync on VLSNIndex.mappingSynchronizer
 *
 * Active forward feeder reads a mapping:
 *  first - getBucket()
 *     - sync on VLSNTracker to access the right bucket
 *  if bucket is in hand
 *    - sync on target bucket to read bucket
 */
public class VLSNIndex {


    /* 
     * The length of time that a checkpoint will wait for the vlsn index to
     * contain all vlsn->lsn mappings before the checkpoint start.
     */
    public static final int AWAIT_CONSISTENCY_MS = 60000;


    private final EnvironmentImpl envImpl;


    /*
     * VLSN waiting: A Feeder may block waiting for the next available record
     * in the replication stream.


     * vlsnPutLatch -      Latch used to wait for the next VLSN put operation.
     * putWaitVLSN -       The VLSN associated with the vlsnPutLatch, it's only
     *                     meaningful in the presence of a latch.
     */
    private VLSNAwaitLatch vlsnPutLatch = null;
    private VLSN putWaitVLSN = null;


    /*
     * Consider replacing the mapping synchronizer with a lower overhead and
     * multi-processor friendly CAS style nowait code sequence.
     */
    private final Object mappingSynchronizer = new Object();
    private final Object flushSynchronizer = new Object();
    private final Logger logger;


    /*
     * nextVLSNCounter is incremented under the log write latch, when used on
     * the master. If this node transitions from replica to master, this
     * counter must be initialized before write operations begin.
     */
    private AtomicLong nextVLSNCounter;


    /*
     * For storing the persistent version of the VLSNIndex. For keys > 0,
     * the key is the VLSN sequence number, data = VLSNBucket. Key = -1 has
     * a special data item, which is the VLSNRange.
     */
    private DatabaseImpl mappingDbImpl;


    /*
     * The tracker handles the real mechanics of maintaining the VLSN range
     * and mappings.
     */
    private VLSNTracker tracker;


    /*
     * A wait-free cache of the most recent log items in the VLSN index. These
     * items are important since they are the ones needed by the feeders that
     * are responsible for supplying timely commit acknowledgments.
     */
    private final LogItemCache logItemCache;


    /*
     * Statistics associated with the VLSN index
     */
    private final StatGroup statistics;


    private final LongStat nHeadBucketsDeleted;


    private final LongStat nTailBucketsDeleted;


    /* For testing [#20726] flushToDatabase while getGTEBucket is executing */
    private TestHook<?> searchGTEHook;


    /**
     * The mapping db's name is passed in as a parameter instead of the more
     * intuitive approach of defining it within the class to facilitate unit
     * testing of the VLSNIndex.
     */
    public VLSNIndex(EnvironmentImpl envImpl,
                     String mappingDbName,
                     @SuppressWarnings("unused")
                     NameIdPair nameIdPair,
                     int vlsnStride,
                     int vlsnMaxMappings,
                     int vlsnMaxDistance,
                     RecoveryInfo recoveryInfo)
        throws DatabaseException {


        this.envImpl = envImpl;


        /*
         * initialize the logger early so it can be used by the following
         * methods.
         */
        logger = LoggerUtils.getLogger(getClass());


        statistics = new StatGroup(VLSNIndexStatDefinition.GROUP_NAME,
                                   VLSNIndexStatDefinition.GROUP_DESC);
        nHeadBucketsDeleted =
            new LongStat(statistics,
                         VLSNIndexStatDefinition.N_HEAD_BUCKETS_DELETED);
        nTailBucketsDeleted =
            new LongStat(statistics,
                         VLSNIndexStatDefinition.N_TAIL_BUCKETS_DELETED);


        init(mappingDbName,
             vlsnStride,
             vlsnMaxMappings,
             vlsnMaxDistance,
             recoveryInfo);


        logItemCache = new LogItemCache(envImpl.getConfigManager().
                                        getInt(RepParams.VLSN_LOG_CACHE_SIZE),
                                        statistics);
    }


    /**
     * Initialize before this node begins working as a master. This node may
     * become a Master directly after recovery, or it may transition to the
     * master state after running for some time as a Replica.
     * <p>
     * Reset the vlsnIndex so the VLSN sequence corresponds to what this node
     * thinks is the next VLSN.
     */
    public void initAsMaster() {
        VLSN last = tracker.getRange().getLast();
        if (last.equals(VLSN.NULL_VLSN)) {


            /*
             * If the master does the conversion, the started VLSN should start
             * from 2 so that Replica would throw a LogRefreshRequiredException
             * and do a NetworkRestore to copy the master logs.
             */
            nextVLSNCounter = envImpl.needRepConvert() ?
                new AtomicLong(1) :
                new AtomicLong(0);
        } else {
            nextVLSNCounter = new AtomicLong(last.getSequence());
        }
    }


    /*
     * Return the VLSN to use for tagging the next replicated log entry. Must
     * be called within the log write latch.
     */
    public VLSN bump() {
        return new VLSN(nextVLSNCounter.incrementAndGet());
    }


    /*
     * Restore the VLSN if the target log entry couldn't be logged. Must
     * be called within the log write latch. If decrement is called, the i/o
     * to log the entry failed, and put() was not called, so there is no
     * need to update the range or bucket.
     */
    public void decrement() {
        nextVLSNCounter.decrementAndGet();
    }


    /*
     * Register a new VLSN->LSN mapping.  This is called outside the log write
     * latch, but within the LogManager log() call. It must not cause any
     * logging of its own and should not cause I/O.
     */
    public void put(LogItem logItem) {


        final VLSN vlsn  = logItem.getHeader().getVLSN();
        final long lsn = logItem.getNewLsn();
        final byte entryType = logItem.getHeader().getType();


        logItemCache.put(vlsn, logItem);


        synchronized (this) {
            tracker.track(vlsn, lsn, entryType);


            synchronized (mappingSynchronizer) {


                /*
                 * Put() calls may come out of order, so free the wait latch if
                 * the incoming VLSN >= the waiting VLSN. For example, a feeder
                 * may be awaiting VLSN 100, but the call to put(101) comes in
                 * before the call to put(100).
                 */
                if ((vlsnPutLatch != null) &&
                    vlsn.compareTo(putWaitVLSN) >= 0) {
                    vlsnPutLatch.setLogItem(logItem);
                    vlsnPutLatch.countDown();
                    vlsnPutLatch = null;
                    putWaitVLSN = null;
                }
            }
        }


        if (logger.isLoggable(Level.FINEST)) {
            LoggerUtils.finest(logger, envImpl, "vlsnIndex put " + vlsn);
        }
    }


    /**
     * Wait for the vlsn, or a higher numbered vlsn, to make its appearance in
     * the VLSN index.
     *
     * @throws InterruptedException
     * @throws WaitTimeOutException if the VLSN did not appear within waitTime
     * or the latch was explicitly terminated.
     *
     * @return the LogItem associated with the vlsn, or null if the entry is
     * now present in the log, but is not available in the LogItemCache.
     */
    public LogItem waitForVLSN(VLSN vlsn, int waitTime)
        throws InterruptedException, WaitTimeOutException {


        /* First check the volatile range field, without synchronizing. */
        VLSNRange useRange = tracker.getRange();
        if (useRange.getLast().compareTo(vlsn) >= 0) {
            return logItemCache.get(vlsn);
        }


        VLSNAwaitLatch waitLatch = null;
        synchronized (this) {
            useRange = tracker.getRange();
            if (useRange.getLast().compareTo(vlsn) >= 0) {
                return logItemCache.get(vlsn);
            }


            synchronized (mappingSynchronizer) {
                /* The target VLSN hasn't arrived yet, we'll wait. */
                setupWait(vlsn);


                /* Copy the latch while synchronized. */
                waitLatch = vlsnPutLatch;
            }
        }


        /*
         * Do any waiting outside the synchronization section. If the
         * waited-for VLSN has already arrived, the waitLatch will have been
         * counted down, and we'll go through.
         */
        if (!waitLatch.await(waitTime, TimeUnit.MILLISECONDS) ||
            waitLatch.isTerminated()) {
            /* Timed out waiting for an incoming VLSN, or was terminated. */
            throw new WaitTimeOutException();
        }


        if (! (tracker.getRange().getLast().compareTo(vlsn) >= 0)) {
            throw EnvironmentFailureException.
            unexpectedState(envImpl, "Waited for vlsn:" + vlsn + 
                            " should be greater than last in range:" + 
                            tracker.getRange().getLast());
        }
        LogItem logItem = waitLatch.getLogItem();
        /* If we waited successfully, logItem can't be null. */
        return logItem.getHeader().getVLSN().equals(vlsn) ?
               logItem :
               /*
                * An out-of-order vlsn put, that is, a later VLSN arrived at
                * the index before this one. We could look for it in the log
                * item cache, but due to the very nature of the out of order
                * put it's unlikely to be there and we would rather not incur
                * the overhead of a failed lookup.
                */
               null;
    }


    /**
     * For unit test only.
     */
    synchronized VLSN getPutWaitVLSN() {
        return putWaitVLSN;
    }


    /**
     * Setup the context for waiting for a not-yet-registered VLSN.
     */
    private void setupWait(VLSN vlsn) {
        if (vlsnPutLatch == null) {
            putWaitVLSN = vlsn;
            vlsnPutLatch = new VLSNAwaitLatch();
        } else {
            /* There can only be on possible VLSN to wait on. */
            if (!vlsn.equals(putWaitVLSN)) {
                throw EnvironmentFailureException.unexpectedState
                    (envImpl, "unexpected get for VLSN: " + vlsn +
                     "already waiting for VLSN: " + putWaitVLSN);
            }
        }
    }


    /**
     * Remove all information from the VLSNIndex for VLSNs <= deleteEndpoint.
     * Used by log cleaning. To properly coordinate with readers of the
     * VLSNIndex, we need to update the range before updating the buckets.
     *
     * We assume that deleteEnd is always the last vlsn in a file, and because
     * of that, truncations will never split a bucket.
     *
     * A truncation may leave a gap at the head of the vlsn index though.
     * This could occur if the buckets have a gap, due to out of order VLSNs.
     * For example, it's possible that the index has these buckets:
     *
     * bucket A: firstVLSN = 10, lastVLSN = 20
     * bucket B: firstVLSN = 22, lastVLSN = 30
     *
     * If we truncate the index at 20 (deleteEnd == 20), then the resulting
     * start of the range is 21, but the first bucket value is 22. In this
     * case, we need to insert a ghost bucket.
     * 
     * This method ensures that any changes are fsynced to disk before file
     * deletion occurs. [#20702]
     *
     * @throws DatabaseException
     */
    public synchronized void truncateFromHead(VLSN deleteEnd,
                                              long deleteFileNum)
        throws DatabaseException {


        LoggerUtils.fine(logger, envImpl,
                         "head truncate called with " + deleteEnd +
                         " delete file#:" + deleteFileNum);


        logItemCache.clear();


        if (deleteEnd.equals(VLSN.NULL_VLSN)) {
            return;
        }


        /*
         * Check the VLSN found in the deleted file against the existing
         * mappings. The range should not be empty, and doing the truncation
         * should not remove the last sync point. We assume that once this
         * environment has received any portion of the replication stream, it
         * will maintain a minimum set of VLSNs.
         */
        VLSNRange currentRange = tracker.getRange();
        if (currentRange.getFirst().compareTo(deleteEnd) > 0) {
            /* deleteEnd has already been cast out of the index. */
            return;
        }


        if (currentRange.isEmpty()) {
            throw EnvironmentFailureException.unexpectedState
                (envImpl, "Didn't expect current range to be empty. " +
                 " End of delete range = " +  deleteEnd);
        }


        if (!currentRange.getLastSync().equals(NULL_VLSN) &&
            (deleteEnd.compareTo(currentRange.getLastSync()) > 0)) {
            throw EnvironmentFailureException.unexpectedState
                (envImpl, "Can't log clean away last matchpoint. DeleteEnd= " +
                 deleteEnd + " lastSync=" + currentRange.getLastSync());
        }


        /*
         * Now that the sanity checks are over, update the in-memory, cached
         * portion of the index. Since the range is the gatekeeper, update
         * the tracker cache before the database, so that the range is
         * adjusted first.
         */
        tracker.truncateFromHead(deleteEnd, deleteFileNum);


        /*
         * Be sure that the changes are fsynced before deleting any files.  The
         * changed vlsn index must be persisted so that there are no references
         * to the deleted, cleaned files. Instead of using COMMIT_SYNC, use
         * COMMIT_NO_SYNC with an explicit environment flush and fsync, because
         * the latter ends the txn and releases locks sooner, and reduces
         * possible lock contention on the VLSNIndex. Both feeders and write
         * operations need to lock the VLSNIndex, so keeping lock contention
         * minimal is essential.
         * [#20702]
         */
        TransactionConfig config = new TransactionConfig();
        config.setDurability(Durability.COMMIT_NO_SYNC);
        Txn txn = Txn.createLocalTxn(envImpl, config);
        boolean success = false;
        try {
            synchronized (flushSynchronizer) {
                pruneDatabaseHead(deleteEnd, deleteFileNum, txn);
                flushToDatabase(txn);
            }
            txn.commit();
            envImpl.flushLog(true /*fsync required*/);
            success = true;
        } finally {
            if (!success) {
                txn.abort();
            }
        }
    }


    /**
     * Remove all information from the VLSNIndex for VLSNs >= deleteStart
     * Used by replica side syncup, when the log is truncated. Assumes that the
     * vlsnIndex is quiescent.
     * @throws DatabaseException
     */
    public synchronized void truncateFromTail(VLSN deleteStart, long lastLsn)
        throws DatabaseException {


        logItemCache.clear();
        VLSNRange currentRange = tracker.getRange();
        if (currentRange.getLast().getNext().equals(deleteStart)) {


            /*
             * deleteStart directly follows what's in this range, no need to
             * delete anything.
             */
            return;
        }


        tracker.truncateFromTail(deleteStart, lastLsn);


        TransactionConfig config = new TransactionConfig();
        
        /* 
         * Be sure to commit synchronously so that changes to the vlsn index
         * are persisted before the log is truncated. There are no feeders or
         * repstream write operations at this time, so the use of COMMIT_SYNC
         * does not introduce any lock contention. [#20702]
         */
        config.setDurability(Durability.COMMIT_SYNC);
        Txn txn = Txn.createLocalTxn(envImpl, config);
        boolean success = false;
        try {
            pruneDatabaseTail(deleteStart, lastLsn, txn);
            flushToDatabase(txn);
            txn.commit();
            success = true;
        } finally {
            if (!success) {
                txn.abort();
            }
        }
    }


    /**
     * All range points (first, last, etc) ought to be seen as one consistent
     * group. Because of that, VLSNIndex doesn't offer getLastVLSN,
     * getFirstVLSN type methods, to discourage the possibility of retrieving
     * range points across two different range sets.
     */
    public VLSNRange getRange() {
        return tracker.getRange();
    }


    /**
     * Returns the statistics associated with the VLSNIndex
     *
     * @return the vlsn statistics.
     */
    public StatGroup getStats(StatsConfig config) {
        return statistics.cloneGroup(config.getClear());
    }


    /**
     * Return the nearest file number <= the log file that houses this VLSN.
     * This method is meant to be efficient and will not incur I/O. If
     * there is no available, it does an approximation. The requested VLSN
     * must be within the VLSNIndex range.
     * @throws DatabaseException
     */
    public long getLTEFileNumber(VLSN vlsn)
        throws DatabaseException {


        VLSNBucket bucket = getLTEBucket(vlsn);
        return bucket.getLTEFileNumber();
    }


    /**
     * Get the vlsnBucket that owns this VLSN. If there is no such bucket, get
     * the bucket that follows this VLSN. Must always return a bucket.
     * @param currentBucketInUse is used only for debugging, to add to the 
     * error message if the GTEBucketFromDatabase fails.
     * @throws DatabaseException
     */
    VLSNBucket getGTEBucket(VLSN vlsn, VLSNBucket currentBucketInUse)
        throws DatabaseException {


        VLSNBucket bucket = tracker.getGTEBucket(vlsn);


        if (bucket == null) {
            return getGTEBucketFromDatabase(vlsn, currentBucketInUse);
        }


        return bucket;
    }


    /**
     * Get the vlsnBucket that owns this VLSN. If there is no such bucket, get
     * the bucket that precedes this VLSN. Must always return a bucket.
     * @throws DatabaseException
     */
    private VLSNBucket getLTEBucket(VLSN vlsn)
        throws DatabaseException {


        VLSNBucket bucket = tracker.getLTEBucket(vlsn);
        if (bucket == null) {
            return getLTEBucketFromDatabase(vlsn);
        }
        return bucket;
    }


    /**
     * @return true if the status and key value indicate that this
     * cursor is pointing at a valid bucket. Recall that the VLSNRange is
     * stored in the same database at entry -1.
     */
    private boolean isValidBucket(OperationStatus status,
                                  DatabaseEntry key) {


        return ((status == OperationStatus.SUCCESS)  &&
                (LongBinding.entryToLong(key) != VLSNRange.RANGE_KEY));
    }


    /*
     * Get the bucket that matches this VLSN. If this vlsn is Y, then we want
     * bucket at key X where X <= Y. If this method is called, we guarantee
     * that a non-null bucket will be returned.
     */
    public VLSNBucket getLTEBucketFromDatabase(VLSN vlsn)
        throws DatabaseException {


        Cursor cursor = null;
        Locker locker = null;
        DatabaseEntry key = new DatabaseEntry();
        DatabaseEntry data= new DatabaseEntry();
        try {
            locker = BasicLocker.createBasicLocker(envImpl);
            cursor = makeCursor(locker);


            if (positionBeforeOrEqual(cursor, vlsn, key, data)) {
                return VLSNBucket.readFromDatabase(data);
            }


            /* Shouldn't get here. */
            throw EnvironmentFailureException.unexpectedState
                (envImpl, "Couldn't find bucket for LTE VLSN " + vlsn +
                 "in database. tracker=" + tracker);
        } finally {
            if (cursor != null) {
                cursor.close();
            }


            if (locker != null) {
                locker.operationEnd(true);
            }
        }
    }


    /**
     * Return the bucket that holds a mapping >= this VLSN.  If this method is
     * called, we guarantee that a non-null bucket will be returned.
     *
     * At this point, we are sure that the target vlsn is within the range of
     * vlsns held in the database. However, note that there is no explicit
     * synchronization between this database search, and the
     * VLSNTracker.flushToDatabase, which might be writing additional buckets
     * to this database. This may affect the cases when the cursor search
     * does not return a equality match on a bucket. [#20726]
     * 
     * For example, suppose the database looks like this:
     * key=vlsn 10, data = bucket: vlsn 10 -> lsn 0x10/100
     *                             vlsn 15 -> lsn 0x10/150
     * key=vlsn 20, data = bucket: vlsn 20 -> lsn 0x11/100
     *                             vlsn 25 -> lsn 0x11/150
     * If we are looking for a bucket for vlsn 22, there will not be a match
     * from the call to cursor.getSearchKeyRange(key=22). The code that
     * accounts for that will need to consider that new buckets may be flushed
     * to the database while the search for a new bucket is going on. For
     * example, 
     *
     * key=vlsn 30, data = bucket: vlsn 30 -> lsn 0x12/100
     *                             vlsn 35 -> lsn 0x12/150
     *
     * may be written to the database while we are searching for a bucket that
     * owns vlsn 22.
     */
    private VLSNBucket getGTEBucketFromDatabase(VLSN target, 
                                                VLSNBucket currentBucketInUse)
        throws DatabaseException {


        Cursor cursor = null;
        Locker locker = null;
        try {
            locker = BasicLocker.createBasicLocker(envImpl);
            cursor = makeCursor(locker);


            /*
             * Look at the bucket at key >= target.Will return null if no GTE
             * bucket.
             */
            VLSNBucket bucket = examineGTEBucket(target, cursor);
            if (bucket != null) {
                return bucket;
            }


            /*
             * We're here because we did not find a bucket >= target. Let's
             * examine the last bucket in this database. We know that it will
             * either be:
             *
             * 1) a bucket that's < target, but owns the mapping
             * 2) if the index was appended to by VLSNTracker.flushToDatabase
             *    while the search is going on, the last bucket may be one
             *    that is > or >= target. 
             * Using the example above, the last bucket could be case 1:
             *
             * a bucket that is < target 22:
             *    key=vlsn 20, data = bucket: vlsn 20 -> lsn 0x11/100
             *                                vlsn 25 -> lsn 0x11/150
             *
             * or case 2, a bucket that is >= target 22, because the index grew
             *    key=vlsn 30, data = bucket: vlsn 30 -> lsn 0x12/100
             *                                vlsn 35 -> lsn 0x12/150
             */
            assert(TestHookExecute.doHookIfSet(searchGTEHook));
            VLSNBucket endBucket = null;
            DatabaseEntry key = new DatabaseEntry();
            DatabaseEntry data = new DatabaseEntry();
            OperationStatus status = cursor.getLast(key, data, 
                                                    LockMode.DEFAULT);
            if (isValidBucket(status, key)) {
                endBucket = VLSNBucket.readFromDatabase(data);
                if (endBucket.owns(target)) {
                    return endBucket;
                }


                /* 
                 * If this end bucket is not the owner of the target VLSN, we
                 * expect it to be a greaterThan bucket which was inserted
                 * because of a concurrent VLSNTracker.flushToDatabase call 
                 * that did not exist when we did the previous
                 * cursor.getKeyRangeSearch (case 2), In that case, we can
                 * search again for the owning bucket.
                 */
                if (endBucket.follows(target)) {
                    bucket = examineGTEBucket(target, cursor);
                    if (bucket != null) {
                        return bucket;
                    }
                }
            }


            /*
             * Shouldn't get here! There should have been a bucket in this
             * database >= this target.
             */


            /* Dump the bucket database for debugging. */
            int count = 0;
            StringBuilder sb = new StringBuilder();
            status = cursor.getFirst(key, data, LockMode.DEFAULT);
            while (status == OperationStatus.SUCCESS) {
                Long keyValue = LongBinding.entryToLong(key);
                sb.append("key => " + keyValue + "\n");


                if (count == 0) {
                    VLSNRange range = VLSNRange.readFromDatabase(data);
                    sb.append("range =>" + range + "\n");
                } else {
                    bucket = VLSNBucket.readFromDatabase(data);
                    sb.append("bucket => " + bucket + "\n");
                }
                
                count++;
                status = cursor.getNext(key, data, LockMode.DEFAULT);
            }
            
            LoggerUtils.severe(logger, envImpl, "VLSNIndex Dump: " + 
                               sb.toString());


            throw EnvironmentFailureException.unexpectedState
                (envImpl, "Couldn't find bucket for GTE VLSN " + target +
                 " in database. EndBucket=" + endBucket + "currentBucket=" +
                 currentBucketInUse + " tracker = " + tracker);
        } finally {
            if (cursor != null) {
                cursor.close();
            }


            if (locker != null) {
                locker.operationEnd(true);
            }
        }
    }


    /**
     * Find a bucket that is GTE the target, and sees if that bucket is
     * the owner. If it is not the owner look at the previous bucket. 
     * @return null if no GTE bucket was found.
     */
    private VLSNBucket examineGTEBucket(VLSN target, Cursor cursor) {


        /* getSearchKeyRange will return a bucket >= target if one exists */
        DatabaseEntry key = new DatabaseEntry();
        DatabaseEntry data = new DatabaseEntry();
        LongBinding.longToEntry(target.getSequence(), key);
        OperationStatus status =
            cursor.getSearchKeyRange(key, data, LockMode.DEFAULT);


        if (status == OperationStatus.SUCCESS) {
            VLSNBucket bucket = VLSNBucket.readFromDatabase(data);
            if (bucket.owns(target)) {
                return bucket;
            }


            /*
             * The bucket we found is > than our target. Look at the
             * previous one.
             */
            status = cursor.getPrev(key, data, LockMode.DEFAULT);
            if (isValidBucket(status, key)) {
                VLSNBucket prevBucket = VLSNBucket.readFromDatabase(data);
                if (prevBucket.owns(target)) {
                    return prevBucket;
                }
            }


            /*
             * There is no bucket that owns this target, return the greater
             * one.
             */
            return bucket;
        } 


        /* No bucket at a key >= the target. */
        return null;
    }
    
   /*
    * Position this cursor at the largest value bucket which is <= the
    * target VLSN.
    * @return true if there is a bucket that fits this criteria,
    */
    private boolean positionBeforeOrEqual(Cursor cursor,
                                          VLSN vlsn,
                                          DatabaseEntry key,
                                          DatabaseEntry data)
        throws DatabaseException {


        LongBinding.longToEntry(vlsn.getSequence(), key);
        VLSNBucket bucket = null;


        /* getSearchKeyRange will give us a bucket >= Y. */
        OperationStatus status =
            cursor.getSearchKeyRange(key, data, LockMode.DEFAULT);


        if (status == OperationStatus.SUCCESS) {
            bucket = VLSNBucket.readFromDatabase(data);
            if (bucket.owns(vlsn)) {
                return true;
            }


            /* The bucket we found is > than our VLSN. Get the previous one. */
            status = cursor.getPrev(key, data, LockMode.DEFAULT);
            if (isValidBucket(status, key)) {
                return true;
            }


            /* Hey, nothing else in the database. */
            return false;
        }
        /*
         * There was no bucket >= Y. Let's find the last bucket in this
         * database then. It should be a bucket that's < Y.
         */
        status =  cursor.getLast(key, data, LockMode.DEFAULT);
        if (isValidBucket(status, key)) {
            return true;
        }


        return false;
    }


   /*
    * Position this cursor at the smallest value bucket which is >= the
    * target VLSN.
    * @return true if there is a bucket that fits this criteria,
    */
    private boolean positionAfterOrEqual(Cursor cursor,
                                         VLSN vlsn,
                                         DatabaseEntry key,
                                         DatabaseEntry data)
        throws DatabaseException {


        LongBinding.longToEntry(vlsn.getSequence(), key);
        VLSNBucket bucket = null;


        /* getSearchKeyRange will give us a bucket >= Y. */
        OperationStatus status =
            cursor.getSearchKeyRange(key, data, LockMode.DEFAULT);


        if (status == OperationStatus.SUCCESS) {
            bucket = VLSNBucket.readFromDatabase(data);
            if (bucket.owns(vlsn)) {
                return true;
            }


            /*
             * This bucket is > our VLSN. Check the bucket before.
             *  - It might be a bucket that owns this VLSN
             *  - the prevbucket might precede this VLSN.
             *  - the record before might be the range.
             * One way or another, there should always be a record before
             * any bucket -- it's the range.
             */
            status = cursor.getPrev(key, data, LockMode.DEFAULT);
            assert status == OperationStatus.SUCCESS;


            if (isValidBucket(status, key)) {
                bucket = VLSNBucket.readFromDatabase(data);
                if (bucket.owns(vlsn)) {
                    return true;
                }
            }


            /*
             * Move back to the original bucket, all those preceding buckets
             * were unsatifactory.
             */
            status = cursor.getNext(key, data, LockMode.DEFAULT);
            return true;
        }
        /*
         * There was no bucket >= Y. Let's find the last bucket in this
         * database then. It should be a bucket that's < Y.
         */
        status =  cursor.getLast(key, data, LockMode.DEFAULT);
        if (isValidBucket(status, key)) {
            bucket = VLSNBucket.readFromDatabase(data);
            if (bucket.owns(vlsn)) {
                return true;
            }
        }


        return false;
    }


    /*
     * Remove all VLSN->LSN mappings <= deleteEnd
     */
    private void pruneDatabaseHead(VLSN deleteEnd,
                                   long deleteFileNum,
                                   Txn txn)
        throws DatabaseException {
        Cursor cursor = null;


        try {
            cursor = makeCursor(txn);


            DatabaseEntry key = new DatabaseEntry();
            DatabaseEntry data = new DatabaseEntry();
            if (!positionBeforeOrEqual(cursor, deleteEnd, key, data)) {
                /* Nothing to do. */
                return;
            }


            /* Delete this bucket and everything before this bucket. */


            /* Avoid fetching the bucket itself, since it's not needed */
            final DatabaseEntry noData = new DatabaseEntry();
            noData.setPartial(0, 0, true);
            int deleteCount = 0;
            do {
                long keyValue = LongBinding.entryToLong(key);
                if (keyValue == VLSNRange.RANGE_KEY) {
                    break;
                }


                OperationStatus status = cursor.delete();
                deleteCount++;
                if (status != OperationStatus.SUCCESS) {
                    throw EnvironmentFailureException.unexpectedState
                        (envImpl, "Couldn't delete, got status of " + status +
                         "for delete of bucket " + keyValue + " deleteEnd=" +
                         deleteEnd);
                }
            } while (cursor.getPrev(key, noData, LockMode.DEFAULT) ==
                     OperationStatus.SUCCESS);
            nHeadBucketsDeleted.add(deleteCount);


            /*
             * Check the first real bucket, and see if we need to insert
             * a ghost bucket.
             */
            VLSN newStart = deleteEnd.getNext();
            LongBinding.longToEntry(1, key);
            OperationStatus status =
                cursor.getSearchKeyRange(key, data, LockMode.DEFAULT);


            /* No real buckets, nothing to adjust. */
            if (status != OperationStatus.SUCCESS) {
                return;
            }


            VLSNBucket firstBucket = VLSNBucket.readFromDatabase(data);
            /* First bucket matches the range, nothing to adjust. */
            if (firstBucket.getFirst().equals(newStart)) {
                return;
            }


            if (firstBucket.getFirst().compareTo(newStart) < 0) {
                throw EnvironmentFailureException.unexpectedState
                    (envImpl, "newStart " + newStart +
                     " should be < first bucket:" + firstBucket);
            }


            /*
             * Add a ghost bucket so that there is a bucket to match the
             * first item in the range.
             */
            long nextFile = envImpl.getFileManager().
                getFollowingFileNum(deleteFileNum,
                                    true /* forward */);
            long lastPossibleLsn = firstBucket.getLsn(firstBucket.getFirst());
            VLSNBucket placeholder =
                new GhostBucket(newStart, DbLsn.makeLsn(nextFile, 0),
                                                     lastPossibleLsn);
            placeholder.writeToDatabase(envImpl, cursor);


        } finally {
            if (cursor != null) {
                cursor.close();
            }
        }
    }


    /*
     * Remove all VLSN->LSN mappings >= deleteStart.  Recall that the
     * mappingDb is keyed by the first VLSN in the bucket. The replication
     * stream will be quiescent when this is called. The caller must be
     * sure that there are buckets in the database that cover deleteStart.
     *
     * @param lastLsn if NULL_LSN, the pruning may need to delete mappings <
     * deleteSTart, in order to keep the bucket capped with a legitimate
     * lastLSN. If lastLsn is not NULL_LSN, then the deletion can precisely
     * delete only mappings >= deleteStart.
     * @param lastVLSN left on disk.
     */
    private VLSN pruneDatabaseTail(VLSN deleteStart, long lastLsn, Txn txn)
        throws DatabaseException {


        VLSN lastOnDiskVLSN = deleteStart.getPrev();
        Cursor cursor = null;


        try {
            cursor = makeCursor(txn);


            DatabaseEntry key = new DatabaseEntry();
            DatabaseEntry data = new DatabaseEntry();
            if (!positionAfterOrEqual(cursor, deleteStart, key, data)) {
                return tracker.getLastOnDisk();
            }


            /*
             * Does this bucket straddle deleteStart? Then prune off part of
             * the bucket.
             */
            VLSNBucket bucket = VLSNBucket.readFromDatabase(data);
            if (bucket.getFirst().compareTo(deleteStart) < 0) {
                bucket.removeFromTail(deleteStart, lastLsn);
                lastOnDiskVLSN = bucket.getLast();
                bucket.fillDataEntry(data);
                OperationStatus status = cursor.putCurrent(data);
                if (status != OperationStatus.SUCCESS) {
                    throw EnvironmentFailureException.unexpectedState
                        (envImpl, "Couldn't update " + bucket);
                }


                status = cursor.getNext(key, data, LockMode.DEFAULT);
                if (status != OperationStatus.SUCCESS) {
                    return lastOnDiskVLSN;
                }
            }


            /* Delete everything after this bucket. */


            /* Avoid fetching the bucket itself, since it's not needed */
            final DatabaseEntry noData = new DatabaseEntry();
            noData.setPartial(0, 0, true);
            int deleteCount = 0;
            do {
                OperationStatus status = cursor.delete();
                if (status != OperationStatus.SUCCESS) {
                    throw EnvironmentFailureException.unexpectedState
                        (envImpl, "Couldn't delete after vlsn " + deleteStart +
                         " status=" + status);
                }
            } while (cursor.getNext(key, noData, LockMode.DEFAULT) ==
                     OperationStatus.SUCCESS);
            nTailBucketsDeleted.add(deleteCount);
        } finally {
            if (cursor != null) {
                cursor.close();
            }
        }
        return lastOnDiskVLSN;
    }


    /**
     * At startup, we need to
     * - get a handle onto the internal database which stores the VLSN index
     * - read the latest on-disk version to initialize the tracker
     * - find any VLSN->LSN mappings which were not saved in the on-disk
     *   version, and merge them in. Thse mappings weren't flushed because
     *   they occurred after the checkpoint end. They're found by the recovery
     *   procedure, and are added in now.
     *
     * This method will execute when the map is quiescent, and needs no
     * synchronization.
     */
    private void init(String mappingDbName,
                      int vlsnStride,
                      int vlsnMaxMappings,
                      int vlsnMaxDistance,
                      RecoveryInfo recoveryInfo)
        throws DatabaseException {


        openMappingDatabase(mappingDbName);
        tracker = new VLSNTracker(envImpl, mappingDbImpl, vlsnStride,
                                  vlsnMaxMappings, vlsnMaxDistance,
                                  statistics);


        /*
         * Put any in-memory mappings discovered during the recovery process
         * into the fileMapperDb. That way, we'll preserve mappings that
         * precede this recovery's checkpoint.
         *
         * For example, suppose the log looks like this:
         *
         * VLSN1
         * VLSN2
         * checkpoint start for this recovery, for the instantiation of the
         *          replicator
         * checkpoint end for this recovery
         * <- at this point in time, after the env comes up, we'll create
         * the VLSN index. VLSN1 and VLSN2 were discovered during recovery and
         * are recorded in memory. Normally a checkpoint flushes the VLSNIndex
         * but the VLSNIndex isn't instantiated yet, because the VLSNIndex
         * needs an initialized environment.
         */
        merge((VLSNRecoveryTracker) recoveryInfo.vlsnProxy);
    }


    /*
     * Update this index, which was initialized with what's on disk, with
     * mappings found during recovery. These mappings ought to either overlap
     * what's on disk, or cover the range immediately after what's on disk.  If
     * it doesn't, the recovery mechanism, which flushes the mapping db at
     * checkpoint is faulty and we've lost mappings.
     *
     * In other words, if this tracker holds the VLSN range a -> c, then the
     * recovery tracker will have the VLSN range b -> d, where
     *
     *   a <= b
     *   c <= d
     *   if c < b, then b == c+1
     *
     * This method must be called when the index and tracker are quiescent, and
     * there are no calls to track().
     *
     * The recoveryTracker is the authoritative voice on what should be in the
     * VLSN index.
     */
    void merge(VLSNRecoveryTracker recoveryTracker) {


        if (recoveryTracker == null) {
            flushToDatabase(Durability.COMMIT_SYNC);
            return;
        }


        if (recoveryTracker.isEmpty()) {


            /*
             * Even though the recovery tracker has no mappings, it may have
             * seen a rollback start that indicates that the VLSNIndex should
             * be truncated. Setup the recovery tracker so it looks like
             * it has a single mapping -- the matchpoint VLSN and LSN and
             * proceed. Take this approach, rather than truncating the index,
             * because we may need that matchpoint mapping to cap off the
             * VLSN range.
             *
             * For example, suppose an index has mappings for VLSN 1, 5, 10,
             * and the rollback is going to matchpoint 7. A pure truncation
             * would lop off VLSN 10, making VLSN 5 the last mapping. We
             * would then need to add on VLSN 7.
             */
            VLSN lastMatchpointVLSN = recoveryTracker.getLastMatchpointVLSN();
            if (lastMatchpointVLSN.isNull()) {
                return;
            }


            /*
             * Use a MATCHPOINT log entry to indicate that this is a syncable
             * entry. This purposefully leaves the recovery tracker's range's
             * lastTxnEnd null, so it will not overwrite the on disk
             * tracker. This assumes that we will never rollback past the last
             * txn end.
             */
            recoveryTracker.track(lastMatchpointVLSN,
                                  recoveryTracker.getLastMatchpointLsn(),
                                  LogEntryType.LOG_MATCHPOINT.getTypeNum());
        }


        /*
         * The mappings held in the recoveryTracker must either overlap what's
         * on disk or immediately follow the last mapping on disk. If there
         * is a gap between what is on disk and the recovery tracker, something
         * went awry with the checkpoint scheme, which flushes the VLSN index
         * at each checkpoint. We're in danger of losing some mappings. Most
         * importantly, the last txnEnd VLSN in the range might not be right.
         *
         * The one exception is when the Environment has been converted from
         * non-replicated and there are no VLSN entries in the VLSNIndex. In
         * that case, it's valid that the entries seen from the recovery
         * tracker may have a gap in VLSNs. For example, in a newly converted
         * environment, the VLSN index range has NULL_VLSN as its last entry,
         * but the first replicated log entry will start with 2.
         *
         * Note: EnvironmentImpl.needRepConvert() would more accurately convey
         * the fact that this is the very first recovery following a
         * conversion.  But needRepConvert() on a replica is never true, and we
         * need to disable this check on the replica's first recovery too.
         */
        VLSN persistentLast = tracker.getRange().getLast();
        VLSN recoveryFirst = recoveryTracker.getRange().getFirst();
        if ((!(envImpl.isRepConverted() && persistentLast.isNull()) ||
             !envImpl.isRepConverted()) &&
            recoveryFirst.compareTo(persistentLast.getNext()) > 0) {


            throw EnvironmentFailureException.unexpectedState
                (envImpl, "recoveryTracker should overlap or follow on disk " +
                 "last VLSN of " + persistentLast + " recoveryFirst= " +
                 recoveryFirst);
        }


        VLSNRange currentRange = tracker.getRange();
        if (currentRange.getLast().getNext().equals(recoveryFirst)) {
            /* No overlap, just append mappings found at recovery. */
            tracker.append(recoveryTracker);
            flushToDatabase(Durability.COMMIT_SYNC);
            return;
        }


        /*
         * The mappings in the recovery tracker should overwrite those in the
         * VLSN index.
         */
        TransactionConfig config = new TransactionConfig();
        config.setDurability(Durability.COMMIT_SYNC);
        Txn txn = Txn.createLocalTxn(envImpl, config);
        boolean success = false;
        VLSN lastOnDiskVLSN;
        try {
            lastOnDiskVLSN = pruneDatabaseTail(recoveryFirst, DbLsn.NULL_LSN,
                                               txn);
            tracker.merge(lastOnDiskVLSN, recoveryTracker);
            flushToDatabase(txn);
            txn.commit();
            success = true;
        } finally {
            if (!success) {
                txn.abort();
            }
        }
    }


    private void openMappingDatabase(String mappingDbName)
        throws DatabaseException {


        final Locker locker =
            Txn.createLocalAutoTxn(envImpl, new TransactionConfig());


        try {
            DbTree dbTree = envImpl.getDbTree();
            DatabaseImpl db = dbTree.getDb(locker,
                                           mappingDbName,
                                           null /* databaseHandle */);
            if (db == null) {
                if (envImpl.isReadOnly()) {
                    /* This should have been caught earlier. */
                    throw EnvironmentFailureException.unexpectedState
                       ("A replicated environment can't be opened read only.");
                }
                DatabaseConfig dbConfig = new DatabaseConfig();
                DbInternal.setReplicated(dbConfig, false);
                db = dbTree.createInternalDb(locker, mappingDbName, dbConfig);
            }
            mappingDbImpl = db;
        } finally {
            locker.operationEnd(true);
        }
    }


    public synchronized void close() {
        close(true);
    }


    public synchronized void abnormalClose() {
        close(false);
    }


    public void close(boolean doFlush)
        throws DatabaseException {


        try {
            if (doFlush) {
                flushToDatabase(Durability.COMMIT_SYNC);
            }


            if (vlsnPutLatch != null) {


                /*
                 * This should be harmless because the feeders using the latch
                 * should all have been interrupted and shutdown. So just log
                 * this fact.
                 */
                vlsnPutLatch.terminate();
                LoggerUtils.fine
                    (logger, envImpl,
                     "Outstanding VLSN put latch cleared at close");
            }
        } finally {
            if (mappingDbImpl != null) {
                envImpl.getDbTree().releaseDb(mappingDbImpl);
                mappingDbImpl = null;
            }
        }
    }


    /** For unit testing. */
    public DatabaseImpl getDatabaseImpl() {
        return mappingDbImpl;
    }


    /**
     * Mappings are flushed to disk at close, and at checkpoints.
     */
    public void flushToDatabase(Durability useDurability) {
       
        TransactionConfig config = new TransactionConfig();
        config.setDurability(useDurability);
        Txn txn = Txn.createLocalTxn(envImpl, config);
        boolean success = false;
        try {
            flushToDatabase(txn);
            txn.commit();
            success = true;
        } finally {
            if (!success) {
                txn.abort();
            }
        }
    }


    /**
     * Mappings are flushed to disk at close, and at checkpoints.
     */
    private void flushToDatabase(Txn txn)
        throws DatabaseException {
        synchronized (flushSynchronizer) {
            tracker.flushToDatabase(mappingDbImpl, txn);
        }
    }


    /**
     * For debugging and unit tests
     * @throws DatabaseException
     */
    public Map<VLSN, Long> dumpDb(boolean display) {


        Cursor cursor = null;
        Locker locker = null;
        if (display) {
            System.out.println(tracker);
        }


        Map<VLSN, Long> mappings = new HashMap<VLSN, Long>();


        try {
            locker = BasicLocker.createBasicLocker(envImpl);
            cursor = makeCursor(locker);


            DatabaseEntry key = new DatabaseEntry();
            DatabaseEntry data = new DatabaseEntry();


            /*
             * The first item in the database is the VLSNRange. All subsequent
             * items are VLSNBuckets.
             */
            int count = 0;
            while (cursor.getNext(key, data, LockMode.DEFAULT) ==
                   OperationStatus.SUCCESS) {


                Long keyValue = LongBinding.entryToLong(key);


                if (display) {
                    System.out.println("key => " + keyValue);
                }
                if (count == 0) {
                    VLSNRange range = VLSNRange.readFromDatabase(data);
                    if (display) {
                        System.out.println("range =>");
                        System.out.println(range);
                    }
                } else {
                    VLSNBucket bucket = VLSNBucket.readFromDatabase(data);
                    for (long i = bucket.getFirst().getSequence();
                         i <= bucket.getLast().getSequence();
                         i++) {
                        VLSN v = new VLSN(i);
                        long lsn = bucket.getLsn(v);


                        if (lsn != DbLsn.NULL_LSN) {
                            mappings.put(v, lsn);
                        }
                    }
                    if (display) {
                        System.out.println("bucket =>");
                        System.out.println(bucket);
                    }
                }
                count++;
            }
        } finally {
            if (cursor != null) {
                cursor.close();
            }


            if (locker != null) {
                locker.operationEnd(true);
            }
        }


        return mappings;
    }


    /**
     * For DbStreamVerify utility. Verify the on-disk database, disregarding
     * the cached tracker.
     * @throws DatabaseException
     */
    @SuppressWarnings("null")
    public static void verifyDb(Environment env,
                                PrintStream out,
                                boolean verbose)
        throws DatabaseException {


        DatabaseConfig dbConfig = new DatabaseConfig();
        dbConfig.setReadOnly(true);
        Database db = env.openDatabase
            (null, DbType.VLSN_MAP.getInternalName(), dbConfig);
        Cursor cursor = null;
        try {
            if (verbose) {
                System.out.println("Verifying VLSN index");
            }


            cursor = db.openCursor(null, CursorConfig.READ_COMMITTED);


            DatabaseEntry key = new DatabaseEntry();
            DatabaseEntry data = new DatabaseEntry();


            /*
             * The first item in the database is the VLSNRange. All subsequent
             * items are VLSNBuckets.
             */
            int count = 0;
            VLSNRange range = null;
            VLSNBucket lastBucket = null;
            Long lastKey = null;
            VLSN firstVLSNSeen = VLSN.NULL_VLSN;
            VLSN lastVLSNSeen = VLSN.NULL_VLSN;
            while (cursor.getNext(key, data, null) ==
                   OperationStatus.SUCCESS) {


                Long keyValue = LongBinding.entryToLong(key);


                if (count == 0) {
                    if (keyValue != VLSNRange.RANGE_KEY) {
                        out.println("Wrong key value for range! " + range);
                    }
                    range = VLSNRange.readFromDatabase(data);
                    if (verbose) {
                        out.println("range=>" + range);
                    }
                } else {
                    VLSNBucket bucket = VLSNBucket.readFromDatabase(data);
                    if (verbose) {
                        out.print("key=> " + keyValue);
                        out.println(" bucket=>" + bucket);
                    }


                    if (lastBucket != null) {
                        if (lastBucket.getLast().compareTo(bucket.getFirst())
                            >= 0) {
                            out.println("Buckets out of order.");
                            out.println("Last = " + lastKey + "/" +
                                        lastBucket);
                            out.println("Current = " + keyValue + "/" +
                                        bucket);
                        }
                    }


                    lastBucket = bucket;
                    lastKey = keyValue;
                    if ((firstVLSNSeen != null) && firstVLSNSeen.isNull()) {
                        firstVLSNSeen = bucket.getFirst();
                    }
                    lastVLSNSeen = bucket.getLast();
                }
                count++;
            }


            if (count == 0) {
                out.println("VLSNIndex not on disk");
                return;
            }


            if (firstVLSNSeen.compareTo(range.getFirst()) != 0) {
                out.println("First VLSN in bucket = " + firstVLSNSeen +
                            " and doesn't match range " + range.getFirst());
            }


            if (lastVLSNSeen.compareTo(range.getLast()) != 0) {
                out.println("Last VLSN in bucket = " + lastVLSNSeen +
                            " and doesn't match range " + range.getLast());
            }


        } finally {
            if (cursor != null) {
                cursor.close();
            }


            db.close();
        }
    }


    /* For unit test support. Index needs to be quiescent */
    @SuppressWarnings("null")
    public synchronized boolean verify(boolean verbose)
        throws DatabaseException {


        if (!tracker.verify(verbose)) {
            return false;
        }




        VLSNRange dbRange = null;
        ArrayList<VLSN> firstVLSN = new ArrayList<VLSN>();
        ArrayList<VLSN> lastVLSN = new ArrayList<VLSN>();
        final Locker locker = BasicLocker.createBasicLocker(envImpl);
        Cursor cursor = null;


        /*
         * Synchronize so we don't try to verify while the checkpointer
         * thread is calling flushToDatabase on the vlsnIndex.
         */
        synchronized (flushSynchronizer) {
            /*
             * Read the on-disk range and buckets.
             * -The tracker and the database buckets should not intersect.
             * -The on-disk range should be a subset of the tracker range.
             */
            try {
                cursor = makeCursor(locker);


                DatabaseEntry key = new DatabaseEntry();
                DatabaseEntry data = new DatabaseEntry();


                /*
                 * Read the on-disk range and all the buckets.
                 */
                OperationStatus status =
                    cursor.getFirst(key, data, LockMode.DEFAULT);
                if (status == OperationStatus.SUCCESS) {
                    VLSNRangeBinding rangeBinding = new VLSNRangeBinding();
                    dbRange = rangeBinding.entryToObject(data);


                    /* Collect info about the  buckets. */
                    while (cursor.getNext(key, data, LockMode.DEFAULT) ==
                           OperationStatus.SUCCESS) {


                        VLSNBucket bucket = VLSNBucket.readFromDatabase(data);


                        Long keyValue = LongBinding.entryToLong(key);
                        if (bucket.getFirst().getSequence() != keyValue) {
                            return false;
                        }


                        firstVLSN.add(bucket.getFirst());
                        lastVLSN.add(bucket.getLast());
                    }
                }
            } finally {
                if (cursor != null) {
                    cursor.close();
                }


                locker.operationEnd(true);
            }


            /*
             * Verify range.
             */
            VLSNRange trackerRange = tracker.getRange();
            if (!trackerRange.verifySubset(verbose, dbRange)) {
                return false;
            }
        }


        VLSN firstTracked = tracker.getFirstTracked();


        /* The db range and the buckets need to be consistent. */
        VLSN firstOnDisk = null;
        VLSN lastOnDisk = null;
        if (firstVLSN.size() > 0) {
            /* There are buckets in the database. */
            lastOnDisk =  lastVLSN.get(lastVLSN.size()-1);
            firstOnDisk = firstVLSN.get(0);


            if (!VLSNTracker.verifyBucketBoundaries(firstVLSN, lastVLSN)) {
                return false;
            }


            /*
             * A VLSNIndex invariant is that there is always a mapping for the
             * first and last VLSN in the range.  However, if the log cleaner
             * lops off the head of the index, leaving a bucket gap at the
             * beginning of the index, we break this invariant. For example,
             * suppose the index has
             *
             * bucketA - VLSNs 10
             * no bucket, due to out of order mapping - VLSN 11, 12
             * bucket B - VLSNs 13-15
             *
             * If the cleaner deletes VLSN 10->11, VLSN 12 will be the start
             * of the range, and needs a bucket. We'll do this by adding a
             * bucket placeholder.
             */
            if (dbRange.getFirst().compareTo(firstOnDisk) != 0) {
                dumpMsg(verbose, "Range doesn't match buckets " +
                        dbRange + " firstOnDisk = " + firstOnDisk);
                return false;
            }


            /* The tracker should know what the last VLSN on disk is. */
            if (!lastOnDisk.equals(tracker.getLastOnDisk())) {
                dumpMsg(verbose, "lastOnDisk=" + lastOnDisk +
                        " tracker=" + tracker.getLastOnDisk());
                return false;
            }


            if (!firstTracked.equals(NULL_VLSN)) {


                /*
                 * The last bucket VLSN should precede the first tracker VLSN.
                 */
                if (firstTracked.compareTo(lastOnDisk.getNext()) < 0) {
                    dumpMsg(verbose, "lastOnDisk=" + lastOnDisk +
                            " firstTracked=" + firstTracked);
                    return false;
                }
            }
        }
        return true;
    }


    private void dumpMsg(boolean verbose, String msg) {
        if (verbose) {
            System.out.println(msg);
        }
    }


    /*
     * For unit test support only. Can only be called when replication stream
     * is quiescent.
     */
    public boolean isFlushedToDisk() {
        return tracker.isFlushedToDisk();
    }


    /**
     * Ensure that the in-memory vlsn index encompasses all logged entries
     * before it is flushed to disk. A No-Op for non-replicated systems. 
     *
     * The problem is in the interaction of logging and VLSN
     * tracking. Allocating an new VLSN and logging a replicated log entry is
     * done within the log write latch, without any VLSNINdex
     * synchronization. That must be done to keep the log write latch critical
     * section as small as possible, and to avoid any lock hiearchy issues.
     *
     * The VLSNIndex is updated after the log write latch critical section. The
     * VLSNIndex is flushed to disk by checkpoint, and it is assumed that this
     * persistent version of the index encompasses all VLSN entries prior to
     * checkpoint start. Since the logging of a new VLSN, and the flushing of
     * the index are not atomic, it's possible that the checkpointer may start
     * the flush of the vlsnIndex before the last vlsn's mapping is recorded
     * in the index. To obey the requirement that the checkpointed vlsn index
     * encompass all mappings < checkpoint start, check that the vlsn index
     * is up to date before the flush.
     * [#19754]
     *
     * awaitConsistency() works by using the same waitFroVLSN() method used by
     * the Feeders. WaitForVLSN asserts that all feeders are waiting on single
     * vlsn, to assure that no feeders are left in limbo, awaiting a vlsn that
     * has gone by. This contract is valid for the feeders, because they wait
     * for vlsns sequentially, consuming each one by one. However, this ckpter
     * awaitConsistency functionality uses the nextVLSNCounter, which can 
     * leapfrog ahead arbitrarily, in this case:
     *
     *    vlsn range holds 1 -> N-1
     *    Feeder is present, awaiting vlsn N
     *    thread A bumps vlsn to N  and writes record under log write latch
     *    thread B bumps vlsn to N + 1 and writes record under log write latch
     *    ckpter awaits consistency, using N+1, while feeders are awaiting N
     *    thread A puts VLSN N outside log write latch
     *    thread B puts VLSN N+1 outside log write latch
     *
     * Because of this, the ckpter must distinguish between what it is really
     * waiting on (VLSN N+1) and what is can next wait on to fulfil the
     * feeder waiting contract (VLSN N)
     */
    public void awaitConsistency() {


        /* VLSNIndex is not initialized and in use yet, no need to wait. */
        if (nextVLSNCounter == null) {
            return;
        }


  VLSN vlsnAllocatedBeforeCkpt = null;
  VLSN endOfRangePlusOne;
        while (true) {


      /* 
       * If we retry, get a fresh VLSN value if and only if the
             * previously determined vlsnAllocatedBeforeCkpt was decremented
             * due to a logging failure.
       */
      if (vlsnAllocatedBeforeCkpt == null) {
    vlsnAllocatedBeforeCkpt = new VLSN(nextVLSNCounter.get());
      } else {
    VLSN latestAllocated = new VLSN(nextVLSNCounter.get());
    if (latestAllocated.compareTo(vlsnAllocatedBeforeCkpt) < 0) {
        LoggerUtils.info(logger, envImpl,
             "Reducing awaitConsistency VLSN from " +
             vlsnAllocatedBeforeCkpt + " to " +
             latestAllocated);
        vlsnAllocatedBeforeCkpt = latestAllocated;
    }
      } 


            /* 
             * [#20165] Since the await is based on the nextVLSNCounter, it's 
             * possible that a feeder is already waiting on earlier VLSN. 
             * Safeguard against that by only waiting for one more than
             * the end of the range, to avoid conflict with feeders.
             * See method comments.
             */
            endOfRangePlusOne = tracker.getRange().getLast().getNext();
            if (vlsnAllocatedBeforeCkpt.compareTo(endOfRangePlusOne) < 0) {
    /* 
     * All vlsns allocated before the checkpoint are now in the 
     * range.
     */
    break;
    }


      if (logger.isLoggable(Level.FINE)) {
    LoggerUtils.fine(logger, envImpl, "awaitConsistency target=" +
         endOfRangePlusOne + " allocatedBeforeCkpt=" +
         vlsnAllocatedBeforeCkpt);
      } 


            try {
   
                waitForVLSN(endOfRangePlusOne, AWAIT_CONSISTENCY_MS);
                if (endOfRangePlusOne.compareTo(vlsnAllocatedBeforeCkpt) >= 0) {
                    /* We reached the real target. */
                break;
                }


                /* 
                 * We got to the VLSN we waited for, but it's still earlier than
                 * vlsnAllocatedBeforeCkpt. Loop again.
                 */
            } catch (WaitTimeOutException e) {
                LoggerUtils.severe(logger, envImpl,
                                   "Retrying for vlsn index consistency " +
                                   " before checkpoint, awaiting vlsn " + 
                                   endOfRangePlusOne +
                                   " with ckpt consistency target of " +
                                   vlsnAllocatedBeforeCkpt);
            } catch (InterruptedException e) {
                LoggerUtils.severe(logger, envImpl,
                                   "Interrupted while awaiting vlsn index " +
                                   "consistency before checkpoint, awaiting " +
                                   "vlsn " +  endOfRangePlusOne +
                                   " with ckpt consistency target of " + 
                                   vlsnAllocatedBeforeCkpt +  ", will retry");
            }
        }
    }


    void setGTEHook(TestHook<?> hook) {
        searchGTEHook = hook;
    }


    /**
     * A cursor over the VLSNIndex.
     */
    private abstract static class VLSNScanner {
        VLSNBucket currentBucket;
        final VLSNIndex vlsnIndex;


        /*
         * This is purely for assertions. The VLSNScanner assumes that
         * getStartingLsn() is called once before getLsn() is called.
         */
        int startingLsnInvocations;


        VLSNScanner(VLSNIndex vlsnIndex) {
            this.vlsnIndex = vlsnIndex;
            startingLsnInvocations = 0;
        }


        public abstract long getStartingLsn(VLSN vlsn);


        /**
         * @param vlsn We're requesting a LSN mapping for this vlsn
         * @return If there is a mapping for this VLSN, return it, else return
         * NULL_LSN. We assume that we checked that this VLSN is in the
         * VLSNIndex's range.
         */
        public abstract long getPreciseLsn(VLSN vlsn);
    }


    /**
     * Assumes that VLSNs are scanned backwards. May be used by syncup to
     * optimally search for matchpoints.
     */
    public static class BackwardVLSNScanner extends VLSNScanner {


        public BackwardVLSNScanner(VLSNIndex vlsnIndex) {
            super(vlsnIndex);
        }


        /*
         * Use the >= mapping for the requested VLSN to find the starting lsn
         * to use for a scan. This can only be used on a VLSN that is known to
         * be in the range.
         */
        @Override
        public long getStartingLsn(VLSN vlsn) {


            startingLsnInvocations++;
            currentBucket = vlsnIndex.getGTEBucket(vlsn, null);
            return currentBucket.getGTELsn(vlsn);
        }


        /**
         * @see VLSNScanner#getPreciseLsn
         */
        @Override
        public long getPreciseLsn(VLSN vlsn) {
            assert startingLsnInvocations == 1 : "startingLsns() called " +
                startingLsnInvocations + " times";


            /*
             * Ideally, we have a bucket that has the mappings for this VLSN.
             * If we don't, we attempt to get the next applicable bucket.
             */
            if (currentBucket != null)  {
                if (!currentBucket.owns(vlsn)) {


                    /*
                     * This bucket doesn't own the VLSN. Is it because (a)
                     * there's a gap and two buckets don't abut, or (b) because
                     * we walked off the end of the current bucket, and we need
                     * a new one? Distinguish case (a) by seeing if the current
                     * bucket will be needed for an upcoming VLSN.
                     */
                    if (currentBucket.precedes(vlsn)) {
                        return DbLsn.NULL_LSN;
                    }


                    /*
                     * Case B: We've walked off the end of the current
                     * bucket.
                     */
                    currentBucket = null;
                }
            }


            /*
             * We walked off the end of the currentBucket. Get a new bucket,
             * finding the closest bucket that would hold this mapping.
             */
            if (currentBucket == null) {
                currentBucket = vlsnIndex.getLTEBucket(vlsn);


                /*
                 * The next bucket doesn't own this vlsn, which means that
                 * we're in a gap between two buckets.  Note:
                 * vlsnIndex.LTEBucket guards against returning null.
                 */
                if (!currentBucket.owns(vlsn)) {
                    return DbLsn.NULL_LSN;
                }
            }


            assert currentBucket.owns(vlsn) : "vlsn = " + vlsn +
                " currentBucket=" + currentBucket;


            /* We're in the right bucket. */
            return currentBucket.getLsn(vlsn);
        }
    }


    /**
     * Disable critical eviction for all VLSNIndex cursors. [#18475] An
     * improvement would be to enable eviction, and do all database operations
     * that are in a loop asynchronously.
     */
    private Cursor makeCursor(Locker locker) {
        Cursor cursor = DbInternal.makeCursor(mappingDbImpl,
                                              locker,
                                              CursorConfig.DEFAULT);
        DbInternal.getCursorImpl(cursor).setAllowEviction(false);
        return cursor;
    }


    /**
     * Scans VLSNs in a forward direction, used by feeders.
     */
    public static class ForwardVLSNScanner extends VLSNScanner {




        public ForwardVLSNScanner(VLSNIndex vlsnIndex) {
            super(vlsnIndex);
        }


        /**
         * Use the <= mapping to the requested VLSN to find the starting lsn to
         * use for a scan.  This can only be used on a VLSN that is known to be
         * in the range.
         */
        @Override
        public long getStartingLsn(VLSN vlsn) {


            startingLsnInvocations++;
            currentBucket = vlsnIndex.getLTEBucket(vlsn);
            return currentBucket.getLTELsn(vlsn);
        }




        /**
         * @see VLSNScanner#getPreciseLsn
         */
        @Override
        public long getPreciseLsn(VLSN vlsn) {
            return getLsn(vlsn, false /* approximate */);
        }


        /**
         * When doing an approximate search, the target vlsn may be a non-mapped
         * vlsn within a bucket, or it may be between two different buckets.
         * For example, suppose we have two buckets:
         *
         * vlsn 1 -> lsn 10
         * vlsn 5 -> lsn 50
         * vlsn 7 -> lsn 70
         *
         * vlsn 20 -> lsn 120
         * vlsn 25 -> lsn 125
         *
         * If the vlsn we are looking for is 4, the LTE lsn for an approximate
         * return value will be vlsn 1-> lsn 10, in the same bucket. If we are
         * looking for vlsn 9, the LTE lsn for an approximate return value will
         * be vlsn 7->lsn 70, which is the last mapping in an earlier bucket.
         *
         * @param vlsn We're requesting a LSN mapping for this vlsn
         * @return If there is a mapping for this VLSN, return it. If it does
         * not exist, return the nearest non-null mapping, where nearest the
         * <= LSN. We assume that we checked that this VLSN is in the
         * VLSNIndex's range.
         */
        public long getApproximateLsn(VLSN vlsn) {
            return getLsn(vlsn, true /* approximate */);
        }


        private long getLsn(VLSN vlsn, boolean approximate) {


            assert startingLsnInvocations == 1 : "startingLsns() called " +
                startingLsnInvocations + " times";
            VLSNBucket debugBucket = currentBucket;


            /*
             * Ideally, we have a bucket that has the mappings for this VLSN.
             * If we don't, we attempt to get the next applicable bucket.
             */
            if (currentBucket != null)  {
                if (!currentBucket.owns(vlsn)) {


                    /*
                     * This bucket doesn't own the VLSN. Is it because (a)
                     * there's a gap and two buckets don't abut, or (b) because
                     * we walked off the end of the current bucket, and we need
                     * a new one? Distinguish case (a) by seeing if the current
                     * bucket will be needed for an upcoming VLSN.
                     */
                    if (currentBucket.follows(vlsn)) {
                        /* Case A: No bucket available for this VLSN. */
                        return approximate ?
                                findPrevLsn(vlsn) : DbLsn.NULL_LSN;
                    }


                    /* Case B: We've walked off the end of the bucket. */
                    currentBucket = null;
                }
            }


            /*
             * We walked off the end of the currentBucket. Get a new bucket,
             * finding the closest bucket that would hold this mapping.
             */
            if (currentBucket == null) {
                currentBucket = vlsnIndex.getGTEBucket(vlsn, debugBucket);


                /*
                 * The next bucket doesn't own this vlsn, which means that
                 * we're in a gap between two buckets. Note:
                 * vlsnIndex.getGTEBucket guards against returning null.
                 */
                if (!currentBucket.owns(vlsn)) {
                    return approximate ? findPrevLsn(vlsn) : DbLsn.NULL_LSN;
                }
            }


            assert currentBucket.owns(vlsn) : "vlsn = " + vlsn +
                " currentBucket=" + currentBucket;


            if (approximate) {


                /*
                 * We're in the right bucket, and it owns this
                 * VLSN. Nevertheless, the bucket may or may not contain a
                 * mapping for this VLSN, so return the LTE version mapping.
                 */
                return currentBucket.getLTELsn(vlsn);
            }


            return currentBucket.getLsn(vlsn);
        }


        /*
         * Find the lsn mapping that precedes the target. This assumes that
         * no bucket owns the target vlsn -- that it's a vlsn that falls
         * between buckets.
         */
        private long findPrevLsn(VLSN target) {
            VLSNBucket prevBucket = vlsnIndex.getLTEBucket(target);
            assert !prevBucket.owns(target) : "target=" + target +
              "prevBucket=" + prevBucket + " currentBucket=" + currentBucket;
            return prevBucket.getLastLsn();
        }
    }


    /**
     * Associates the logItem with the latch, so that it's readily available
     * when the latch is released.
     */
    public static class VLSNAwaitLatch extends CountDownLatch {
        /* The LogItem whose addition to the VLSN released the latch. */
        private LogItem logItem = null;
        private boolean terminated = false;


        public VLSNAwaitLatch() {
            super(1);
        }


        public long getTriggerLSN() {
            return logItem.getNewLsn();
        }


        public VLSN getTriggerVLSN() {
            return logItem.getHeader().getVLSN();
        }


        public void setLogItem(LogItem logItem) {
            this.logItem = logItem;
        }


        /**
         * Returns the log item that caused the latch to be released. It's only
         * meaningful after the latch has been released.
         *
         * @return log item or null if the latch timed out or it's wait was
         * terminated
         */
        public LogItem getLogItem() {
            return logItem;
        }


        /* Free up any waiters on this latch and shutdown. */
        public void terminate() {
            terminated = true;
            countDown();
        }


        public boolean isTerminated() {
            return terminated;
        }
    }


    /*
     * An exception primarily intended to implement non-local control flow
     * upon a vlsn wait latch timeout.
     */
    @SuppressWarnings("serial")
    static public class WaitTimeOutException extends Exception {


        @Override
        /* Eliminate unnecessary overhead. */
        public synchronized Throwable fillInStackTrace(){return null;}
    }
}
Source Code of com.sleepycat.je.rep.vlsn.VLSNIndex$WaitTimeOutException

Related Classes of com.sleepycat.je.rep.vlsn.VLSNIndex$WaitTimeOutException