Source Code of com.sleepycat.je.cleaner.FileProcessor

/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.
 *
 */


package com.sleepycat.je.cleaner;


import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Level;


import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentFailureReason;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.log.ChecksumException;
import com.sleepycat.je.log.CleanerFileReader;
import com.sleepycat.je.log.entry.LNLogEntry;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.BINDelta;
import com.sleepycat.je.tree.ChildReference;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.LN;
import com.sleepycat.je.tree.SearchResult;
import com.sleepycat.je.tree.Tree;
import com.sleepycat.je.tree.TreeLocation;
import com.sleepycat.je.tree.WithRootLatched;
import com.sleepycat.je.txn.BasicLocker;
import com.sleepycat.je.txn.LockGrantType;
import com.sleepycat.je.txn.LockResult;
import com.sleepycat.je.txn.LockType;
import com.sleepycat.je.utilint.DaemonThread;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.TestHookExecute;


/**
 * Reads all entries in a log file and either determines them to be obsolete or
 * marks them for migration.  LNs are marked for migration by setting the BIN
 * entry MIGRATE flag.  INs are marked for migration by setting the dirty flag.
 *
 * May be invoked explicitly by calling doClean, or woken up if used as a
 * daemon thread.
 */
class FileProcessor extends DaemonThread {


    /**
     * The number of LN log entries after we process pending LNs.  If we do
     * this too seldom, the pending LN queue may grow large, and it isn't
     * budgeted memory.  If we process it too often, we will repeatedly request
     * a non-blocking lock for the same locked node.
     */
    private static final int PROCESS_PENDING_EVERY_N_LNS = 100;


    /**
     * Whether to prohibit BINDeltas for a BIN that is fetched by the cleaner.
     * The theory is that when fetching a BIN during cleaning we normally
     * expect that the BIN will be evicted soon, and a delta during checkpoint
     * would be wasted.  However, this does not take into account use of the
     * BIN by the application after fetching; the BIN could become hot and then
     * deltas may be profitable.  To be safe we currently allow deltas when
     * fetching.
     */
    private static final boolean PROHIBIT_DELTAS_WHEN_FETCHING = false;


    private static final boolean DEBUG_TRACING = false;


    private EnvironmentImpl env;
    private Cleaner cleaner;
    private FileSelector fileSelector;
    private UtilizationProfile profile;
    private UtilizationCalculator calculator;


    /* Log version for the target file. */
    private int fileLogVersion;


    /* Per Run counters. Reset before each file is processed. */
    private int nINsObsoleteThisRun = 0;
    private int nINsCleanedThisRun = 0;
    private int nINsDeadThisRun = 0;
    private int nINsMigratedThisRun = 0;
    private int nBINDeltasObsoleteThisRun = 0;
    private int nBINDeltasCleanedThisRun = 0;
    private int nBINDeltasDeadThisRun = 0;
    private int nBINDeltasMigratedThisRun = 0;
    private int nLNsObsoleteThisRun = 0;
    private int nLNsCleanedThisRun = 0;
    private int nLNsDeadThisRun = 0;
    private int nLNsLockedThisRun = 0;
    private int nLNsMigratedThisRun = 0;
    private int nLNsMarkedThisRun = 0;
    private int nLNQueueHitsThisRun = 0;
    private int nEntriesReadThisRun;
    private long nRepeatIteratorReadsThisRun;


    FileProcessor(String name,
                  EnvironmentImpl env,
                  Cleaner cleaner,
                  UtilizationProfile profile,
                  UtilizationCalculator calculator,
                  FileSelector fileSelector) {
        super(0, name, env);
        this.env = env;
        this.cleaner = cleaner;
        this.fileSelector = fileSelector;
        this.profile = profile;
        this.calculator = calculator;
    }


    public void clearEnv() {
        env = null;
        cleaner = null;
        fileSelector = null;
        profile = null;
        calculator = null;
    }


    /**
     * Return the number of retries when a deadlock exception occurs.
     */
    @Override
    protected long nDeadlockRetries() {
        return cleaner.nDeadlockRetries;
    }


    /**
     * Activates the cleaner.  Is normally called when je.cleaner.byteInterval
     * bytes are written to the log.
     */
    @Override
    public void onWakeup()
        throws DatabaseException {


        doClean(true,   // invokedFromDaemon
                true,   // cleanMultipleFiles
                false); // forceCleaning
    }


    /**
     * Cleans selected files and returns the number of files cleaned.  May be
     * called by the daemon thread or programatically.
     *
     * @param invokedFromDaemon currently has no effect.
     *
     * @param cleanMultipleFiles is true to clean until we're under budget,
     * or false to clean at most one file.
     *
     * @param forceCleaning is true to clean even if we're not under the
     * utilization threshold.
     *
     * @return the number of files cleaned, not including files cleaned
     * unsuccessfully.
     */
    public synchronized int doClean(boolean invokedFromDaemon,
                                    boolean cleanMultipleFiles,
                                    boolean forceCleaning)
        throws DatabaseException {


        if (env.isClosed()) {
            return 0;
        }


        assert env.getSyncCleanerBarrier() != null;


        /*
         * Get all file summaries including tracked files.  Tracked files may
         * be ready for cleaning if there is a large cache and many files have
         * not yet been flushed and do not yet appear in the profile map.
         */
        SortedMap<Long, FileSummary> fileSummaryMap =
            profile.getFileSummaryMap(true /*includeTrackedFiles*/);


        /* Clean until no more files are selected.  */
        final int nOriginalLogFiles = fileSummaryMap.size();
        int nFilesCleaned = 0;
        while (true) {


            /* Don't clean forever. */
            if (nFilesCleaned >= nOriginalLogFiles) {
                LoggerUtils.logMsg(logger, env, Level.FINE,
                                   "Maximum files cleaned for one run. " +
                                   fileSelector);
                break;
            }


            /* Stop if the daemon is paused or the environment is closing. */
            if ((invokedFromDaemon && isPaused()) || env.isClosing()) {
                break;
            }


            /*
             * Process pending LNs periodically.  Pending LNs can prevent file
             * deletion.  Do not call deleteSafeToDeleteFiles here, since
             * cleaner threads will block while the checkpointer deletes log
             * files, which can be time consuming.
             */
            cleaner.processPending();


            /*
             * To avoid a deadlock, set protected files in calculator before
             * synchronizing on FileSelector (in selectFileForCleaning below).
             */
            calculator.setProtectedFiles();


            /*
             * Select the next file for cleaning and update the Cleaner's
             * read-only file collections.
             */
            final boolean needLowUtilizationSet =
                cleaner.clusterResident || cleaner.clusterAll;


            Long fileNum = fileSelector.selectFileForCleaning
                (calculator, fileSummaryMap, forceCleaning,
                 needLowUtilizationSet, cleaner.maxBatchFiles);


            cleaner.updateReadOnlyFileCollections();


            /*
             * If no file was selected above, the estimated total utilization
             * is above the minUtilization threshold.  However, we may need to
             * read the log file to correct utilization.
             */
            boolean calcUtilizationOnly = false;
            if (fileNum == null) {
                fileNum = fileSelector.selectFileForCorrection(calculator,
                                                               fileSummaryMap);
                calcUtilizationOnly = true;
            }


            /*
             * Stop if no file is selected for cleaning or utilization
             * correction.
             */
            if (fileNum == null) {
                break;
            }


            /* Keep track of estimated and true utilization. */
            final FileSummary estimatedFileSummary;
            if (fileSummaryMap.containsKey(fileNum)) {
                estimatedFileSummary = fileSummaryMap.get(fileNum).clone();
            } else {
                estimatedFileSummary = null;
            }
            final FileSummary trueFileSummary = new FileSummary();


            /*
             * Process the selected file.
             */
            resetPerRunCounters();
            boolean finished = false;
            boolean fileDeleted = false;
            final long fileNumValue = fileNum.longValue();
            cleaner.nCleanerRuns.increment();
            if (calcUtilizationOnly) {
                cleaner.nCleanerProbeRuns.increment();
            }
            final long runId = cleaner.nCleanerRuns.get();
            final MemoryBudget budget = env.getMemoryBudget();
            try {
                TestHookExecute.doHookIfSet(cleaner.fileChosenHook);
                final String traceMsg =
                    "CleanerRun " + runId +
                    " on file 0x" + Long.toHexString(fileNumValue) +
                    " begins" +
                    " probe=" + calcUtilizationOnly +
                    " backlog=" + fileSelector.getBacklog();
                LoggerUtils.logMsg(logger, env, Level.FINE, traceMsg);


                if (DEBUG_TRACING) {
                    System.out.println("\n" + traceMsg);
                }


                /* Process all log entries in the file. */
                if (processFile(fileNum, trueFileSummary,
                                calcUtilizationOnly)) {
                    /* File is fully processed, update stats. */
                    nFilesCleaned += 1;
                    accumulatePerRunCounters();
                    finished = true;
                    if (estimatedFileSummary != null) {
                        calculator.adjustUtilization(fileNum,
                                                     fileSummaryMap.lastKey(),
                                                     estimatedFileSummary,
                                                     trueFileSummary);
                    }
                }
            } catch (FileNotFoundException e) {


                /*
                 * File was deleted.  Although it is possible that the file was
                 * deleted externally it is much more likely that the file was
                 * deleted normally after being cleaned earlier.  This can
                 * occur when tracked obsolete information is collected and
                 * processed after the file has been cleaned and deleted.
                 * Since the file does not exist, ignore the error so that the
                 * cleaner will continue.  The tracing below will indicate that
                 * the file was deleted.  Remove the file completely from the
                 * FileSelector and UtilizationProfile so that we don't
                 * repeatedly attempt to process it. [#15528]
                 */
                fileDeleted = true;
                profile.removeFile(fileNum, null /*databases*/);
                fileSelector.removeAllFileReferences(fileNum, budget);
            } catch (IOException e) {
                LoggerUtils.traceAndLogException(env, "Cleaner", "doClean", "",
                                                 e);
                throw new EnvironmentFailureException
                    (env, EnvironmentFailureReason.LOG_INTEGRITY, e);
            } catch (DatabaseException e) {
                LoggerUtils.traceAndLogException(env, "Cleaner", "doClean", "",
                                                 e);
                throw e;
            } finally {
                if (!finished && !fileDeleted && !calcUtilizationOnly) {
                    fileSelector.putBackFileForCleaning(fileNum);
                }
                String traceMsg =
                    "CleanerRun " + runId +
                    " on file 0x" + Long.toHexString(fileNumValue) +
                    " probe=" + calcUtilizationOnly +
                    " invokedFromDaemon=" + invokedFromDaemon +
                    " finished=" + finished +
                    " fileDeleted=" + fileDeleted +
                    " nEntriesRead=" + nEntriesReadThisRun +
                    " nINsObsolete=" + nINsObsoleteThisRun +
                    " nINsCleaned=" + nINsCleanedThisRun +
                    " nINsDead=" + nINsDeadThisRun +
                    " nINsMigrated=" + nINsMigratedThisRun +
                    " nBINDeltasObsolete=" + nBINDeltasObsoleteThisRun +
                    " nBINDeltasCleaned=" + nBINDeltasCleanedThisRun +
                    " nBINDeltasDead=" + nBINDeltasDeadThisRun +
                    " nBINDeltasMigrated=" + nBINDeltasMigratedThisRun +
                    " nLNsObsolete=" + nLNsObsoleteThisRun +
                    " nLNsCleaned=" + nLNsCleanedThisRun +
                    " nLNsDead=" + nLNsDeadThisRun +
                    " nLNsMigrated=" + nLNsMigratedThisRun +
                    " nLNsMarked=" + nLNsMarkedThisRun +
                    " nLNQueueHits=" + nLNQueueHitsThisRun +
                    " nLNsLocked=" + nLNsLockedThisRun;
                LoggerUtils.traceAndLog(logger, env, Level.FINE, traceMsg);
                if (DEBUG_TRACING) {
                    System.out.println("\n" + traceMsg);
                }
            }


            /* If we should only clean one file, stop now. */
            if (!cleanMultipleFiles) {
                break;
            }


            /* Refresh file summary info for next file selection. */
            fileSummaryMap =
                profile.getFileSummaryMap(true /*includeTrackedFiles*/);
        }


        return nFilesCleaned;
    }


    /**
     * Process all log entries in the given file.
     *
     * Note that we gather obsolete offsets at the beginning of the method and
     * do not check for obsolete offsets of entries that become obsolete while
     * the file is being processed.  An entry in this file can become obsolete
     * before we process it because: 1- normal application activity deletes or
     * updates the entry, 2- proactive migration migrates the entry.  However,
     * large numbers of entries also become obsolete as the result of LN
     * migration while processing the file, when lazyMigration is false.
     * Checking the TrackedFileSummary while processing the file would be
     * expensive if it has many entries, because we perform a linear search in
     * the TFS.  There is a tradeoff between the cost of the TFS lookup and its
     * benefit, which is to avoid a tree search if the entry is obsolete.  Many
     * more lookups for non-obsolete entries than obsolete entries will
     * typically be done.  Because of the high cost of the linear search,
     * especially when processing large log files, we do not check the TFS.
     * [#19626]
     *
     * @param fileNum the file being cleaned.
     *
     * @param fileSummary used to return the true utilization.
     *
     * @param calcUtilizationOnly is true if we should calculate the true
     * utilization of the file but not process the entries.
     *
     * @return false if we aborted file processing because the environment is
     * being closed.
     */
    private boolean processFile(Long fileNum,
                                FileSummary fileSummary,
                                boolean calcUtilizationOnly)
        throws DatabaseException, IOException {


        /* Get the current obsolete offsets for this file. */
        final PackedOffsets obsoleteOffsets =
            profile.getObsoleteDetail(fileNum, true /*logUpdate*/);
        final PackedOffsets.Iterator obsoleteIter = obsoleteOffsets.iterator();
        long nextObsolete = -1;


        /* Copy to local variables because they are mutable properties. */
        final int readBufferSize = cleaner.readBufferSize;
        final int lookAheadCacheSize =
            calcUtilizationOnly ? 0 : cleaner.lookAheadCacheSize;


        /*
         * Add the overhead of this method to the budget.  Two read buffers are
         * allocated by the file reader. The log size of the offsets happens to
         * be the same as the memory overhead.
         */
        final int adjustMem = (2 * readBufferSize) +
                               obsoleteOffsets.getLogSize() +
                               lookAheadCacheSize;
        final MemoryBudget budget = env.getMemoryBudget();
        budget.updateAdminMemoryUsage(adjustMem);


        /* Evict after updating the budget. */
        if (Cleaner.DO_CRITICAL_EVICTION) {
            env.daemonEviction(true /*backgroundIO*/);
        }


        /*
         * We keep a look ahead cache of non-obsolete LNs.  When we lookup a
         * BIN in processLN, we also process any other LNs in that BIN that are
         * in the cache.  This can reduce the number of tree lookups.
         */
        final LookAheadCache lookAheadCache = calcUtilizationOnly ?
            null : new LookAheadCache(lookAheadCacheSize);


        /*
         * For obsolete entries we must check for pending deleted DBs.  To
         * avoid the overhead of DbTree.getDb on every entry we keep a set of
         * all DB IDs encountered and do the check once per DB at the end.
         */
        final Set<DatabaseId> checkPendingDbSet = calcUtilizationOnly ?
            null : new HashSet<DatabaseId>();


        /*
         * Use local caching to reduce DbTree.getDb overhead.  Do not call
         * releaseDb after getDb with the dbCache, since the entire dbCache
         * will be released at the end of thie method.
         */
        final Map<DatabaseId, DatabaseImpl> dbCache =
            new HashMap<DatabaseId, DatabaseImpl>();
        final DbTree dbMapTree = env.getDbTree();


        /* Keep track of all database IDs encountered. */
        final Set<DatabaseId> databases = new HashSet<DatabaseId>();


        /* Create the file reader. */
        final CleanerFileReader reader = new CleanerFileReader
            (env, readBufferSize, DbLsn.makeLsn(fileNum, 0), fileNum,
             fileSummary);
        /* Validate all entries before ever deleting a file. */
        reader.setAlwaysValidateChecksum(true);
        try {
            final TreeLocation location = new TreeLocation();


            int nProcessedLNs = 0;
            while (reader.readNextEntryAllowExceptions()) {
                cleaner.nEntriesRead.increment();
                long logLsn = reader.getLastLsn();
                long fileOffset = DbLsn.getFileOffset(logLsn);
                boolean isLN = reader.isLN();
                boolean isIN = reader.isIN();
                boolean isBINDelta = reader.isBINDelta();
                boolean isDbTree = reader.isDbTree();
                boolean isObsolete = false;


                /* Maintain a set of all databases encountered. */
                final DatabaseId dbId = reader.getDatabaseId();
                DatabaseImpl db = null;
                if (dbId != null) {
                    databases.add(dbId);
                    db = dbMapTree.getDb(dbId, cleaner.lockTimeout, dbCache);


                    /*
                     * If the DB is gone, this entry is obsolete.  If delete
                     * cleanup is in progress, we will put the DB into the DB
                     * pending set further below.  This entry will be declared
                     * deleted after the delete cleanup is finished.
                     */
                    if (db == null || db.isDeleted()) {
                        isObsolete = true;
                    }
                }


                /* Remember the version of the log file. */
                if (reader.isFileHeader()) {
                    fileLogVersion = reader.getFileHeader().getLogVersion();
                }


                /* Stop if the daemon is shut down. */
                if (env.isClosing()) {
                    return false;
                }


                /* Update background reads. */
                int nReads = reader.getAndResetNReads();
                if (nReads > 0) {
                    env.updateBackgroundReads(nReads);
                }


                /* Sleep if background read/write limit was exceeded. */
                env.sleepAfterBackgroundIO();


                /* Check for a known obsolete node. */
                while (nextObsolete < fileOffset && obsoleteIter.hasNext()) {
                    nextObsolete = obsoleteIter.next();
                }
                if (nextObsolete == fileOffset) {
                    isObsolete = true;
                }


                /* Check for the entry type next because it is very cheap. */
                if (!isObsolete &&
                    !isLN &&
                    !isIN &&
                    !isBINDelta &&
                    !isDbTree) {
                    /* Consider all entries we do not process as obsolete. */
                    isObsolete = true;
                }


                /*
                 * SR 14583: In JE 2.0 and later we can assume that all
                 * deleted LNs are obsolete. Either the delete committed and
                 * the BIN parent is marked with a pending deleted bit, or the
                 * delete rolled back, in which case there is no reference
                 * to this entry. JE 1.7.1 and earlier require a tree lookup
                 * because deleted LNs may still be reachable through their BIN
                 * parents.
                 */
                if (!isObsolete &&
                    isLN &&
                    reader.isLNDeleted() &&
                    fileLogVersion > 2) {
                    /* Deleted LNs are always obsolete. */
                    isObsolete = true;
                }


                /* Skip known obsolete nodes. */
                if (isObsolete) {
                    /* Count obsolete stats. */
                    if (!calcUtilizationOnly) {
                        if (isLN) {
                            nLNsObsoleteThisRun++;
                        } else if (isBINDelta) {
                            nBINDeltasObsoleteThisRun++;
                        } else if (isIN) {
                            nINsObsoleteThisRun++;
                        }
                    }
                    /* Update the pending DB set for obsolete entries. */
                    if (checkPendingDbSet != null && dbId != null) {
                        checkPendingDbSet.add(dbId);
                    }
                    /* Count utilization for obsolete entry. */
                    reader.countObsolete();
                    continue;
                }


                /*
                 * If we are only calculating utilization, do not process
                 * non-obsolete entries.
                 */
                if (calcUtilizationOnly) {
                    continue;
                }


                /* Evict before processing each entry. */
                if (Cleaner.DO_CRITICAL_EVICTION) {
                    env.daemonEviction(true /*backgroundIO*/);
                }


                /* The entry is not known to be obsolete -- process it now. */
                if (isLN) {


                    final LNLogEntry lnEntry = reader.getLNLogEntry();
                    lnEntry.postFetchInit(db);
                    final LN targetLN = lnEntry.getLN();
                    final byte[] key = lnEntry.getKey();


                    lookAheadCache.add
                        (Long.valueOf(DbLsn.getFileOffset(logLsn)),
                         new LNInfo(targetLN, dbId, key));


                    if (lookAheadCache.isFull()) {
                        processLN(fileNum, location, lookAheadCache, dbCache);
                    }


                    /*
                     * Process pending LNs before proceeding in order to
                     * prevent the pending list from growing too large.
                     */
                    nProcessedLNs += 1;
                    if (nProcessedLNs % PROCESS_PENDING_EVERY_N_LNS == 0) {
                        cleaner.processPending();
                    }


                } else if (isIN) {


                    final IN targetIN = reader.getIN(db);
                    targetIN.setDatabase(db);


                    processIN(targetIN, db, logLsn);


                } else if (isBINDelta) {


                    final BINDelta delta = reader.getBINDelta();
                    processBINDelta(delta, db, logLsn);


                } else if (isDbTree) {


                    env.rewriteMapTreeRoot(logLsn);
                } else {
                    assert false;
                }
            }


            /* Process remaining queued LNs. */
            if (lookAheadCache != null) {
                while (!lookAheadCache.isEmpty()) {
                    if (Cleaner.DO_CRITICAL_EVICTION) {
                        env.daemonEviction(true /*backgroundIO*/);
                    }
                    processLN(fileNum, location, lookAheadCache, dbCache);
                    /* Sleep if background read/write limit was exceeded. */
                    env.sleepAfterBackgroundIO();
                }
            }


            /* Update the pending DB set. */
            if (checkPendingDbSet != null) {
                for (Iterator<DatabaseId> i = checkPendingDbSet.iterator();
                     i.hasNext();) {
                    final DatabaseId pendingDbId = i.next();
                    final DatabaseImpl db = dbMapTree.getDb
                        (pendingDbId, cleaner.lockTimeout, dbCache);
                    cleaner.addPendingDB(db);
                }
            }


            /* Update reader stats. */
            nEntriesReadThisRun = reader.getNumRead();
            nRepeatIteratorReadsThisRun = reader.getNRepeatIteratorReads();
        } catch (ChecksumException e) {
            throw new EnvironmentFailureException
                (env, EnvironmentFailureReason.LOG_CHECKSUM, e);
        } finally {
            /* Subtract the overhead of this method from the budget. */
            budget.updateAdminMemoryUsage(0 - adjustMem);


            /* Release all cached DBs. */
            dbMapTree.releaseDbs(dbCache);
        }


        /* File is fully processed, update status information. */
        if (!calcUtilizationOnly) {
            fileSelector.addCleanedFile(fileNum, databases,
                                        reader.getLastVLSN(), budget);
        }


        return true;
    }


    /**
     * Unit testing.  Simulates processing of a single LN.
     */
    void testProcessLN(LN targetLN,
                       long logLsn,
                       byte[] key,
                       DatabaseId dbId,
                       Map<DatabaseId, DatabaseImpl> dbCache) {


        LookAheadCache lookAheadCache = new LookAheadCache(1);


        lookAheadCache.add
            (Long.valueOf(DbLsn.getFileOffset(logLsn)),
             new LNInfo(targetLN, dbId, key));


        processLN(DbLsn.getFileNumber(logLsn), new TreeLocation(),
                  lookAheadCache, dbCache);
    }


    /**
     * Processes the first LN in the look ahead cache and removes it from the
     * cache.  While the BIN is latched, look through the BIN for other LNs in
     * the cache; if any match, process them to avoid a tree search later.
     */
    private void processLN(Long fileNum,
                           TreeLocation location,
                           LookAheadCache lookAheadCache,
                           Map<DatabaseId, DatabaseImpl> dbCache)
        throws DatabaseException {


        nLNsCleanedThisRun++;


        /* Get the first LN from the queue. */
        Long offset = lookAheadCache.nextOffset();
        LNInfo info = lookAheadCache.remove(offset);


        LN ln = info.getLN();
        byte[] key = info.getKey();


        long logLsn = DbLsn.makeLsn
            (fileNum.longValue(), offset.longValue());


        /*
         * Do not call releaseDb after this getDb, since the entire dbCache
         * will be released later.
         */
        DatabaseImpl db = env.getDbTree().getDb
            (info.getDbId(), cleaner.lockTimeout, dbCache);


        /* Status variables are used to generate debug tracing info. */
        boolean processedHere = true; // The LN was cleaned here.
        boolean obsolete = false;     // The LN is no longer in use.
        boolean completed = false;    // This method completed.


        BIN bin = null;
        try {
            Tree tree = db.getTree();
            assert tree != null;


            /* Find parent of this LN. */
            boolean parentFound = tree.getParentBINForChildLN
                (location, key, false /*splitsAllowed*/,
                 true /*findDeletedEntries*/, Cleaner.UPDATE_GENERATION);
            bin = location.bin;
            int index = location.index;


            if (!parentFound) {


                nLNsDeadThisRun++;
                obsolete = true;
                completed = true;
                return;
            }


            /*
             * Now we're at the BIN parent for this LN.  If knownDeleted, LN is
             * deleted and can be purged.
             */
            if (bin.isEntryKnownDeleted(index)) {
                nLNsDeadThisRun++;
                obsolete = true;
                completed = true;
                return;
            }


            /* Process this LN that was found in the tree. */
            processedHere = false;
            processFoundLN(info, logLsn, bin.getLsn(index), bin, index);
            completed = true;


            /*
             * For all other non-deleted LNs in this BIN, lookup their LSN
             * in the LN queue and process any matches.
             */
            for (int i = 0; i < bin.getNEntries(); i += 1) {
                long binLsn = bin.getLsn(i);
                if (i != index &&
                    !bin.isEntryKnownDeleted(i) &&
                    !bin.isEntryPendingDeleted(i) &&
                    DbLsn.getFileNumber(binLsn) == fileNum.longValue()) {


                    Long myOffset = Long.valueOf(DbLsn.getFileOffset(binLsn));
                    LNInfo myInfo = lookAheadCache.remove(myOffset);


                    /* If the offset is in the cache, it's a match. */
                    if (myInfo != null) {
                        nLNQueueHitsThisRun++;
                        nLNsCleanedThisRun++;
                        processFoundLN(myInfo, binLsn, binLsn, bin, i);
                    }
                }
            }
            return;


        } finally {
            if (bin != null) {
                bin.releaseLatch();
            }


            if (processedHere) {
                cleaner.logFine(Cleaner.CLEAN_LN, ln, logLsn,
                                completed, obsolete, false /*migrated*/);
            }
        }
    }


    /**
     * Processes an LN that was found in the tree.  Lock the LN's LSN and
     * then set the entry's MIGRATE flag if the LSN of the LN log entry is the
     * active LSN in the tree.
     *
     * @param info identifies the LN log entry.
     *
     * @param logLsn is the LSN of the log entry.
     *
     * @param treeLsn is the LSN found in the tree.
     *
     * @param bin is the BIN found in the tree; is latched on method entry and
     * exit.
     *
     * @param index is the BIN index found in the tree.
     */
    private void processFoundLN(LNInfo info,
                                long logLsn,
                                long treeLsn,
                                BIN bin,
                                int index)
        throws DatabaseException {


        LN lnFromLog = info.getLN();
        byte[] key = info.getKey();


        DatabaseImpl db = bin.getDatabase();
        boolean isTemporary = db.isTemporary();


        /* Status variables are used to generate debug tracing info. */
        boolean obsolete = false;  // The LN is no longer in use.
        boolean migrated = false;  // The LN was in use and is migrated.
        boolean lockDenied = false;// The LN lock was denied.
        boolean completed = false; // This method completed.


        BasicLocker locker = null;
        try {
            Tree tree = db.getTree();
            assert tree != null;


            /*
             * If the tree and log LSNs are equal, then we can be fairly
             * certain that the log entry is current; in that case, it is
             * wasteful to lock the LN here if we will perform lazy migration
             * -- it is better to lock only once during lazy migration.  But if
             * the tree and log LSNs differ, it is likely that another thread
             * has updated or deleted the LN and the log LSN is now obsolete;
             * in this case we can avoid dirtying the BIN by checking for
             * obsoleteness here, which requires locking.  The latter case can
             * occur frequently if trackDetail is false.
             *
             * 1. If the LSN in the tree and in the log are the same, we will
             * attempt to migrate it.
             *
             * 2. If the LSN in the tree is < the LSN in the log, the log entry
             * is obsolete, because this LN has been rolled back to a previous
             * version by a txn that aborted.
             *
             * 3. If the LSN in the tree is > the LSN in the log, the log entry
             * is obsolete, because the LN was advanced forward by some
             * now-committed txn.
             *
             * 4. If the LSN in the tree is a null LSN, the log entry is
             * obsolete. A slot can only have a null LSN if the record has
             * never been written to disk in a deferred write database, and
             * in that case the log entry must be for a past, deleted version
             * of that record.
             */
            if (lnFromLog.isDeleted() &&
                (treeLsn == logLsn) &&
                fileLogVersion <= 2) {


                /*
                 * SR 14583: After JE 2.0, deleted LNs are never found in the
                 * tree, since we can assume they're obsolete and correctly
                 * marked as such in the obsolete offset tracking. JE 1.7.1 and
                 * earlier did not use the pending deleted bit, so deleted LNs
                 * may still be reachable through their BIN parents.
                 */
                obsolete = true;
                nLNsDeadThisRun++;
                bin.setPendingDeleted(index);
            } else if (treeLsn == DbLsn.NULL_LSN) {


                /*
                 * Case 4: The LN in the tree is a never-written LN for a
                 * deferred-write db, so the LN in the file is obsolete.
                 */
                obsolete = true;
            } else if (treeLsn != logLsn && isTemporary) {


                /*
                 * Temporary databases are always non-transactional.  If the
                 * tree and log LSNs are different then we know that the logLsn
                 * is obsolete.  Even if the LN is locked, the tree cannot be
                 * restored to the logLsn because no abort is possible without
                 * a transaction.  We should consider a similar optimization in
                 * the future for non-transactional durable databases.
                 */
                nLNsDeadThisRun++;
                obsolete = true;
            } else if ((treeLsn != logLsn || !cleaner.lazyMigration) &&
                       !isTemporary) {


                /*
                 * Get a lock on the LN if the treeLsn and logLsn are different
                 * to determine definitively whether the logLsn is obsolete.
                 * We must also get a lock if we will migrate the LN now
                 * (lazyMigration is false and isTemporary is false).
                 *
                 * We can hold the latch on the BIN since we always attempt to
                 * acquire a non-blocking read lock.
                 */
                locker = BasicLocker.createBasicLocker(env, false /*noWait*/);
                /* Don't allow this short-lived lock to be preempted/stolen. */
                locker.setPreemptable(false);
                LockResult lockRet = locker.nonBlockingLock
                    (treeLsn, LockType.READ, false /*jumpAheadOfWaiters*/, db);
                if (lockRet.getLockGrant() == LockGrantType.DENIED) {


                    /*
                     * LN is currently locked by another Locker, so we can't
                     * assume anything about the value of the LSN in the bin.
                     */
                    nLNsLockedThisRun++;
                    lockDenied = true;
                } else if (treeLsn != logLsn) {
                    /* The LN is obsolete and can be purged. */
                    nLNsDeadThisRun++;
                    obsolete = true;
                }
            }


            /*
             * At this point either obsolete==true, lockDenied==true, or
             * treeLsn==logLsn.
             */
            if (!obsolete && !lockDenied) {
                assert treeLsn == logLsn;


                /*
                 * If lazyMigration is true, set the migrate flag and dirty
                 * the parent IN.  The evictor or checkpointer will migrate the
                 * LN later.  If lazyMigration is false, migrate the LN now.
                 *
                 * We have a lock on the LN if we are going to migrate it now,
                 * but not if we will set the migrate flag.
                 *
                 * When setting the migrate flag, also populate the target node
                 * so it does not have to be fetched when it is migrated, if
                 * the tree and log LSNs are equal and the target is not
                 * resident.  We must call postFetchInit to initialize MapLNs
                 * that have not been fully initialized yet [#13191].
                 *
                 * For temporary databases, do not rely on the LN migration
                 * mechanism because temporary databases are not checkpointed
                 * or recovered.  Instead, dirty the LN to ensure it is
                 * flushed before its parent is written.  Because we do not
                 * attempt to lock temporary database LNs (see above) we know
                 * that if it is non-obsolete, the tree and log LSNs are equal.
                 * We will always restore the LN to the BIN slot here, and
                 * always log the dirty LN when logging the BIN.
                 *
                 * Also for temporary databases, make both the target LN and
                 * the BIN or IN parent dirty. Otherwise, when the BIN or IN is
                 * evicted in the future, it will be written to disk without
                 * flushing its dirty, migrated LNs.  [#18227]
                 */
                if (bin.getTarget(index) == null) {
                    lnFromLog.postFetchInit(db, logLsn);
                    /* Ensure keys are transactionally correct. [#15704] */
                    bin.updateNode(index, lnFromLog, key /*lnSlotKey*/);
                }


                if (isTemporary) {
                    ((LN) bin.getTarget(index)).setDirty();
                    bin.setDirty(true);
                } else if (cleaner.lazyMigration) {
                    bin.setMigrate(index, true);
                    bin.setDirty(true);
                } else {
                    LN targetLn = (LN) bin.getTarget(index);
                    assert targetLn != null;
                    long newLNLsn = targetLn.log
                        (env, db, bin.getKey(index), logLsn,
                         true /*backgroundIO*/,
                         Cleaner.getMigrationRepContext(targetLn));
                    bin.updateEntry(index, newLNLsn);
                    /* Evict LN if we populated it with the log LN. */
                    if (lnFromLog == targetLn) {
                        bin.updateNode(index, null, null);
                    }
                    /* Lock new LSN on behalf of existing lockers. */
                    CursorImpl.lockAfterLsnChange
                        (db, logLsn, newLNLsn, locker /*excludeLocker*/);
                }


                /*
                 * If the generation is zero, we fetched this BIN just for
                 * cleaning.
                 */
                if (PROHIBIT_DELTAS_WHEN_FETCHING &&
                    bin.getGeneration() == 0) {
                    bin.setProhibitNextDelta();
                }


                /*
                 * Update the generation so that the BIN is not evicted
                 * immediately.  This allows the cleaner to fill in as many
                 * entries as possible before eviction, as to-be-cleaned
                 * files are processed.
                 */
                bin.setGeneration(CacheMode.DEFAULT);


                nLNsMarkedThisRun++;
                migrated = true;
            }
            completed = true;
        } finally {
            if (locker != null) {
                locker.operationEnd();
            }


            /*
             * If a write lock is held, it is likely that the log LSN will
             * become obsolete.  It is more efficient to process this via the
             * pending list than to set the MIGRATE flag, dirty the BIN, and
             * cause the BIN to be logged unnecessarily.
             */
            if (completed && lockDenied) {
                assert !isTemporary;


                /*
                 * We could associate the pending LN with either treeLsn or
                 * logLsn, we just need a convention to follow.  When
                 * processing the pending LN, we always check to see if the LSN
                 * has changed after we latch the BIN.  For consistency with
                 * other places that call addPendingLN, we use treeLsn here.
                 */
                fileSelector.addPendingLN(treeLsn, lnFromLog, db.getId(), key);
            }


            cleaner.logFine(Cleaner.CLEAN_LN, lnFromLog, logLsn, completed,
                            obsolete, migrated);
        }
    }


    /**
     * If this BINDelta is still in use in the in-memory tree, dirty the
     * associated BIN. The next checkpoint will log a new delta or a full
     * version, which will make this delta obsolete.
     */
    private void processBINDelta(BINDelta deltaClone,
                                 DatabaseImpl db,
                                 long logLsn) {


        nBINDeltasCleanedThisRun++;


        /*
         * Search Btree for the BIN associated with this delta.
         *
         * A potential optimization is to use a by-level search to find the
         * BIN's parent (getParentINForChildIN may need to be enhanced to do
         * this), then fetch the delta, then reconstitute BIN using the inClone
         * if indeed it is a delta and its lastFullVersion is LTE the log LSN.
         * This avoids fetching a BIN that not need migration.
         */
        final byte[] searchKey = deltaClone.getSearchKey();
        final BIN treeBin = (BIN) db.getTree().search
            (searchKey, Tree.SearchType.NORMAL, null /*binBoundary*/,
             Cleaner.UPDATE_GENERATION, null /*searchComparator*/);


        if (treeBin == null) {
            /* BIN for this delta is no longer in the tree. */
            nBINDeltasDeadThisRun++;
            return;
        }


        /* Tree BIN is non-null and latched. */
        try {
            final long treeLsn = treeBin.getLastLoggedVersion();


            if (treeLsn == DbLsn.NULL_LSN) {
                /* Current version was never logged. */
                nBINDeltasDeadThisRun++;
                return;
            }


            final int cmp = DbLsn.compareTo(treeLsn, logLsn);


            if (cmp > 0) {
                /* Log entry is obsolete. */
                nBINDeltasDeadThisRun++;
                return;
            }


            /*
             * Log entry is same or newer than what's in the tree.  Dirty the
             * BIN and let the checkpoint write it out.  There is no need to
             * prohibit a delta when the BIN is next logged (as is done when
             * migrating full INs) because logging a new delta will obsolete
             * this delta.
             */
            treeBin.setDirty(true);
            nBINDeltasMigratedThisRun++;


        } finally {
            treeBin.releaseLatch();
        }
    }


    /**
     * If an IN is still in use in the in-memory tree, dirty it. The checkpoint
     * invoked at the end of the cleaning run will end up rewriting it.
     */
    private void processIN(IN inClone,
                           DatabaseImpl db,
                           long logLsn)
        throws DatabaseException {


        boolean obsolete = false;
        boolean dirtied = false;
        boolean completed = false;


        try {
            nINsCleanedThisRun++;


            Tree tree = db.getTree();
            assert tree != null;


            IN inInTree = findINInTree(tree, db, inClone, logLsn);


            if (inInTree == null) {
                /* IN is no longer in the tree.  Do nothing. */
                nINsDeadThisRun++;
                obsolete = true;
            } else {


                /*
                 * IN is still in the tree.  Dirty it.  Checkpoint or eviction
                 * will write it out.  Prohibit the next delta, since the
                 * original version must be made obsolete.
                 */
                nINsMigratedThisRun++;
                inInTree.setDirty(true);
                inInTree.setProhibitNextDelta();
                inInTree.releaseLatch();
                dirtied = true;
            }


            completed = true;
        } finally {
            cleaner.logFine(Cleaner.CLEAN_IN, inClone, logLsn, completed,
                            obsolete, dirtied);
        }
    }


    /**
     * Given a clone of an IN that has been taken out of the log, try to find
     * it in the tree and verify that it is the current one in the log.
     * Returns the node in the tree if it is found and it is current re: LSN's.
     * Otherwise returns null if the clone is not found in the tree or it's not
     * the latest version.  Caller is responsible for unlatching the returned
     * IN.
     */
    private IN findINInTree(Tree tree,
                            DatabaseImpl db,
                            IN inClone,
                            long logLsn)
        throws DatabaseException {


        /* Check if inClone is the root. */
        if (inClone.isDbRoot()) {
            IN rootIN = isRoot(tree, db, inClone, logLsn);
            if (rootIN == null) {


                /*
                 * inClone is a root, but no longer in use. Return now, because
                 * a call to tree.getParentNode will return something
                 * unexpected since it will try to find a parent.
                 */
                return null;
            } else {
                return rootIN;
            }
        }


        /* It's not the root.  Can we find it, and if so, is it current? */
        inClone.latch(Cleaner.UPDATE_GENERATION);
        SearchResult result = null;
        try {
            result = tree.getParentINForChildIN
                (inClone,
                 true,   // requireExactMatch
                 Cleaner.UPDATE_GENERATION,
                 inClone.getLevel(),
                 null);  // trackingList


            if (!result.exactParentFound) {
                return null;
            }


            /* Note that treeLsn may be for a BINDelta, see below. */
            long treeLsn = result.parent.getLsn(result.index);


            /*
             * The IN in the tree is a never-written IN for a DW db so the IN
             * in the file is obsolete. [#15588]
             */
            if (treeLsn == DbLsn.NULL_LSN) {
                return null;
            }


            /*
             * If tree and log LSNs are equal, then we've found the exact IN we
             * read from the log.  We know the treeLsn is not for a BINDelta,
             * because it is equal to LSN of the IN (or BIN) we read from the
             * log.  To avoid a fetch, we can place the inClone in the tree if
             * it is not already resident.
             */
            if (treeLsn == logLsn) {
                IN in = (IN) result.parent.getTarget(result.index);
                if (in == null) {
                    in = inClone;
                    in.postFetchInit(db, logLsn);
                    result.parent.updateNode
                        (result.index, in, null /*lnSlotKey*/);
                }
                in.latch(Cleaner.UPDATE_GENERATION);
                return in;
            }


            /*
             * If the tree and log LSNs are unequal, then we must get the full
             * version LSN in case the tree LSN is actually for a BINDelta.
             * The simplest way to do that is to fetch the IN in the tree.
             *
             * A potential optimization is to fetch the delta first and then
             * reconstitute the BIN using the inClone if the delta's
             * lastFullVersion is equal to the log LSN.  This avoids fetching
             * the inClone we have in hand, or a later full version that does
             * not need migration.
             */
            if (inClone.isBIN()) {


                /*
                 * getParentINForChildIN takes exclusive latches above, so we
                 * use fetchTargetWithExclusiveLatch.
                 */
                final IN in = (IN) result.parent.fetchTargetWithExclusiveLatch
                    (result.index);
                treeLsn = in.getLastFullVersion();
            }


            /* Now compare LSNs, since we know treeLsn is the full version. */
            final int compareVal = DbLsn.compareTo(treeLsn, logLsn);


            if (compareVal > 0) {
                /* Log entry is obsolete. */
                return null;
            }


            /*
             * Log entry is same or newer than what's in the tree.
             * getParentINForChildIN takes exclusive latches above, so we use
             * fetchTargetWithExclusiveLatch.
             */
            final IN in = (IN) result.parent.fetchTargetWithExclusiveLatch
                (result.index);
            in.latch(Cleaner.UPDATE_GENERATION);
            return in;
        } finally {
            if ((result != null) && (result.exactParentFound)) {
                result.parent.releaseLatch();
            }
        }
    }


    /**
     * Get the current root in the tree, or null if the inClone
     * is the current root.
     */
    private static class RootDoWork implements WithRootLatched {
        private final DatabaseImpl db;
        private final IN inClone;
        private final long logLsn;


        RootDoWork(DatabaseImpl db, IN inClone, long logLsn) {
            this.db = db;
            this.inClone = inClone;
            this.logLsn = logLsn;
        }


        public IN doWork(ChildReference root)
            throws DatabaseException {


            if (root == null ||
                (root.getLsn() == DbLsn.NULL_LSN) || // deferred write root
                (((IN) root.fetchTarget(db, null)).getNodeId() !=
                 inClone.getNodeId())) {
                return null;
            }


            /*
             * A root LSN less than the log LSN must be an artifact of when we
             * didn't properly propagate the logging of the rootIN up to the
             * root ChildReference.  We still do this for compatibility with
             * old log versions but may be able to remove it in the future.
             */
            if (DbLsn.compareTo(root.getLsn(), logLsn) <= 0) {
                IN rootIN = (IN) root.fetchTarget(db, null);
                rootIN.latch(Cleaner.UPDATE_GENERATION);
                return rootIN;
            } else {
                return null;
            }
        }
    }


    /**
     * Check if the cloned IN is the same node as the root in tree.  Return the
     * real root if it is, null otherwise.  If non-null is returned, the
     * returned IN (the root) is latched -- caller is responsible for
     * unlatching it.
     */
    private IN isRoot(Tree tree, DatabaseImpl db, IN inClone, long lsn)
        throws DatabaseException {


        RootDoWork rdw = new RootDoWork(db, inClone, lsn);
        return tree.withRootLatchedShared(rdw);
    }


    /**
     * Reset per-run counters.
     */
    private void resetPerRunCounters() {
        nINsObsoleteThisRun = 0;
        nINsCleanedThisRun = 0;
        nINsDeadThisRun = 0;
        nINsMigratedThisRun = 0;
        nLNsObsoleteThisRun = 0;
        nLNsCleanedThisRun = 0;
        nLNsDeadThisRun = 0;
        nLNsMigratedThisRun = 0;
        nLNsMarkedThisRun = 0;
        nLNQueueHitsThisRun = 0;
        nLNsLockedThisRun = 0;
        nEntriesReadThisRun = 0;
        nRepeatIteratorReadsThisRun = 0;
    }


    /**
     * Add per-run counters to total counters.
     */
    private void accumulatePerRunCounters() {
        cleaner.nINsObsolete.add(nINsObsoleteThisRun);
        cleaner.nINsCleaned.add(nINsCleanedThisRun);
        cleaner.nINsDead.add(nINsDeadThisRun);
        cleaner.nINsMigrated.add(nINsMigratedThisRun);
        cleaner.nBINDeltasObsolete.add(nBINDeltasObsoleteThisRun);
        cleaner.nBINDeltasCleaned.add(nBINDeltasCleanedThisRun);
        cleaner.nBINDeltasDead.add(nBINDeltasDeadThisRun);
        cleaner.nBINDeltasMigrated.add(nBINDeltasMigratedThisRun);
        cleaner.nLNsObsolete.add(nLNsObsoleteThisRun);
        cleaner.nLNsCleaned.add(nLNsCleanedThisRun);
        cleaner.nLNsDead.add(nLNsDeadThisRun);
        cleaner.nLNsMigrated.add(nLNsMigratedThisRun);
        cleaner.nLNsMarked.add(nLNsMarkedThisRun);
        cleaner.nLNQueueHits.add(nLNQueueHitsThisRun);
        cleaner.nLNsLocked.add(nLNsLockedThisRun);
        cleaner.nRepeatIteratorReads.add(nRepeatIteratorReadsThisRun);
    }


    /**
     * A cache of LNInfo by LSN offset.  Used to hold a set of LNs that are
     * to be processed.  Keeps track of memory used, and when full (over
     * budget) the next offset should be queried and removed.
     */
    private static class LookAheadCache {


        private final SortedMap<Long,LNInfo> map;
        private final int maxMem;
        private int usedMem;


        LookAheadCache(int lookAheadCacheSize) {
            map = new TreeMap<Long,LNInfo>();
            maxMem = lookAheadCacheSize;
            usedMem = MemoryBudget.TREEMAP_OVERHEAD;
        }


        boolean isEmpty() {
            return map.isEmpty();
        }


        boolean isFull() {
            return usedMem >= maxMem;
        }


        Long nextOffset() {
            return map.firstKey();
        }


        void add(Long lsnOffset, LNInfo info) {
            map.put(lsnOffset, info);
            usedMem += info.getMemorySize();
            usedMem += MemoryBudget.TREEMAP_ENTRY_OVERHEAD;
        }


        LNInfo remove(Long offset) {
            LNInfo info = map.remove(offset);
            if (info != null) {
                usedMem -= info.getMemorySize();
                usedMem -= MemoryBudget.TREEMAP_ENTRY_OVERHEAD;
            }
            return info;
        }
    }
}
Source Code of com.sleepycat.je.cleaner.FileProcessor

Related Classes of com.sleepycat.je.cleaner.FileProcessor