/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved.
*
*/
package com.sleepycat.je.incomp;
import static com.sleepycat.je.incomp.INCompStatDefinition.GROUP_DESC;
import static com.sleepycat.je.incomp.INCompStatDefinition.GROUP_NAME;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_CURSORS_BINS;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_DBCLOSED_BINS;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_NON_EMPTY_BINS;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_PROCESSED_BINS;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_QUEUE_SIZE;
import static com.sleepycat.je.incomp.INCompStatDefinition.INCOMP_SPLIT_BINS;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.cleaner.LocalUtilizationTracker;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.BINReference;
import com.sleepycat.je.tree.CursorsExistException;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.NodeNotEmptyException;
import com.sleepycat.je.tree.Tree;
import com.sleepycat.je.tree.Tree.SearchType;
import com.sleepycat.je.utilint.DaemonThread;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
/**
* JE compression consists of removing deleted entries from BINs, and pruning
* empty IN/BINs from the tree which is also called a reverse split.
*
* One of the reasons compression is treated specially is that slot compression
* cannot be performed inline as part of a delete operation. When we delete an
* LN, a cursor is always present on the LN. The API dictates that the cursor
* will remain positioned on the deleted record. In addition, if the deleting
* transaction aborts we must restore the slot and the possibility of a split
* during an abort is something we wish to avoid; for this reason, compression
* will not occur if the slot's LSN is locked. In principle, slot compression
* could be performed during transaction commit, but that would be expensive
* because a Btree lookup would be required, and this would negatively impact
* operation latency. For all these reasons, slot compression is performed
* after the delete operation is complete and committed, and not in the thread
* performing the operation or transaction commit.
*
* Compression is of two types:
*
* + "Queued compression" is carried out by the INCompressor daemon thread.
* Both slot compression and pruning are performed.
*
* + "Lazy compression" is carried out as part of logging a BIN by certain
* operations (namely checkpointing and eviction). Only slot compression is
* performed by lazy compression, not pruning.
*
* The use of BINDeltas has a big impact on slot compression because slots
* cannot be compressed until we know that a full BIN will next be logged. If
* a slot were compressed prior to logging a BINDelta, the record of the
* compression would be lost and the slot would "reappear" when the BIN is
* reconstituted; therefore, this is not permitted.
*
* Queued compression prior to logging a BINDelta is also wasteful because the
* dequeued entry cannot be processed. Therefore, lazy compression is used
* when a BINDelta will next be logged, and queued compression is used only
* when a full BIN will next be logged. Because, in general, BINDeltas are
* logged more often than BINs, lazy compression is used for slot compression
* more often than queued compression.
*
* You may wonder, since lazy compression is used most of the time for slot
* compression, why use queued compression for slot compression at all? Queued
* compression is useful for slot compression for the following reasons:
*
* + When a cursor is on a BIN, queuing has an advantage over lazy compression.
* If we can't compress during logging because of a cursor, we have to log
* anyway, and we must delay compression and log again later. If we can't
* compress when processing a queue entry, we requeue it and retry later,
* which increases the chances that we will be able to compress before
* logging.
*
* + The code to process a queue entry must do slot compression anyway, even if
* we only want to prune the BIN. We have to account for the case where all
* slots are deleted but not yet compressed. So the code to process the
* queue entry could not be simplified even if we were to decide not to queue
* entries for slot compression.
*
* + Because BINDeltas are not used for DeferredWrite mode, queued compression
* is much more appropriate and efficient in this mode.
*
* The mainstream algorithm for compression is as follows.
*
* 1. When a delete operation occurs (CursorImpl.delete) we call
* Locker.addDeleteInfo, which determines whether a BINDelta will next be
* logged (BIN.shouldLogDelta). If so, it does nothing, meaning that lazy
* compression will be used. If not (a full BIN will next be logged), it
* adds a BINReference to the Locker.deleteInfo map.
*
* 2. When the operation is successful and the Locker releases its locks, it
* copies the BINReferences from the deleteInfo map to the compressor queue.
* For a transaction this happens at commit (Txn.commit). For a
* non-transaction locker this happens when the cursor moves or is closed
* (BasicLocker.releaseNonTxnLocks).
*
* 3. The INCompressor thread processes its queue entries periodically, based
* on EnvironmentConfig.COMPRESSOR_WAKEUP_INTERVAL. For each BINReference
* that was queued, it tries to compress all deleted slots (BIN.compress).
* If the BIN is then empty, it prunes the Btree (Tree.delete). If a slot
* cannot be compressed or an empty BIN cannot be pruned because a slot's
* LSN is locked or a cursor is present, the entry is requeued for retry.
*
* 4. Lazy compression occurs via the checkpointer and evictor. When logging
* an IN, these components pass true for the allowCompress parameter. If a
* full BIN is logged (BIN.shouldLogDelta returns false), the lazyCompress
* method is called by BIN.beforeLog. lazyCompress will attempt to compress
* all deleted slots (BIN.compress). If the BIN is then empty, it will
* queue a BINReference so that pruning will occur later. If a slot cannot
* be compressed (because the LSN is locked or a cursor is present), the
* BIN.afterLog method will queue a BINReference. In this last case, two
* full BINs will be logged consecutively.
*
* Special cases are as follows.
*
* A. Before performing a split, we call lazyCompress in order to avoid the
* split if possible (Tree.searchSubTreeUntilSplit). It is important to
* avoid splitting when compression is deferred due to BINDeltas.
*
* B. When we undo an LN insertion (via abort, rollback or recovery undo in
* RecoveryManager.undo), or redo an LN deletion during recovery
* (RecoveryManager.redo), we queue a BINReference if a full BIN will next
* be logged (BIN.queueSlotDeletion). This mimics what happens during a
* mainstream delete operation.
*/
public class INCompressor extends DaemonThread {
private static final boolean DEBUG = false;
private EnvironmentImpl env;
private final long lockTimeout;
/* stats */
private StatGroup stats;
private LongStat splitBins;
private LongStat dbClosedBins;
private LongStat cursorsBins;
private LongStat nonEmptyBins;
private LongStat processedBins;
private LongStat compQueueSize;
/* per-run stats */
private int splitBinsThisRun = 0;
private int dbClosedBinsThisRun = 0;
private int cursorsBinsThisRun = 0;
private int nonEmptyBinsThisRun = 0;
private int processedBinsThisRun = 0;
/*
* The following stats are not kept per run, because they're set by
* multiple threads doing lazy compression. They are debugging aids; it
* didn't seem like a good idea to add synchronization to the general path.
*/
private int lazyProcessed = 0;
private int wokenUp = 0;
/*
* Store logical references to BINs that have deleted entries and are
* candidates for compaction.
*/
private Map<Long, BINReference> binRefQueue;
private final Object binRefQueueSync;
/* For unit tests */
private TestHook beforeFlushTrackerHook; // [#15528]
public INCompressor(EnvironmentImpl env, long waitTime, String name) {
super(waitTime, name, env);
this.env = env;
lockTimeout = env.getConfigManager().getDuration
(EnvironmentParams.COMPRESSOR_LOCK_TIMEOUT);
binRefQueue = new HashMap<Long, BINReference>();
binRefQueueSync = new Object();
/* Do the stats definitions. */
stats = new StatGroup(GROUP_NAME, GROUP_DESC);
splitBins = new LongStat(stats, INCOMP_SPLIT_BINS);
dbClosedBins = new LongStat(stats, INCOMP_DBCLOSED_BINS);
cursorsBins = new LongStat(stats, INCOMP_CURSORS_BINS);
nonEmptyBins = new LongStat(stats, INCOMP_NON_EMPTY_BINS);
processedBins = new LongStat(stats, INCOMP_PROCESSED_BINS);
compQueueSize = new LongStat(stats, INCOMP_QUEUE_SIZE);
}
synchronized public void clearEnv() {
env = null;
}
/* For unit testing only. */
public void setBeforeFlushTrackerHook(TestHook hook) {
beforeFlushTrackerHook = hook;
}
public synchronized void verifyCursors()
throws DatabaseException {
/*
* Environment may have been closed. If so, then our job here is done.
*/
if (env.isClosed()) {
return;
}
/*
* Use a snapshot to verify the cursors. This way we don't have to
* hold a latch while verify takes locks.
*/
List<BINReference> queueSnapshot = null;
synchronized (binRefQueueSync) {
queueSnapshot = new ArrayList<BINReference>(binRefQueue.values());
}
/*
* Use local caching to reduce DbTree.getDb overhead. Do not call
* releaseDb after each getDb, since the entire dbCache will be
* released at the end.
*/
DbTree dbTree = env.getDbTree();
Map<DatabaseId, DatabaseImpl> dbCache =
new HashMap<DatabaseId, DatabaseImpl>();
try {
Iterator<BINReference> it = queueSnapshot.iterator();
while (it.hasNext()) {
BINReference binRef = it.next();
DatabaseImpl db = dbTree.getDb
(binRef.getDatabaseId(), lockTimeout, dbCache);
BIN bin = searchForBIN(db, binRef);
if (bin != null) {
bin.verifyCursors();
bin.releaseLatch();
}
}
} finally {
dbTree.releaseDbs(dbCache);
}
}
public int getBinRefQueueSize() {
int size = 0;
synchronized (binRefQueueSync) {
size = binRefQueue.size();
}
return size;
}
/*
* There are multiple flavors of the addBin*ToQueue methods. All allow
* the caller to specify whether the daemon should be notified. Currently
* no callers proactively notify, and we rely on lazy compression and
* the daemon timebased wakeup to process the queue.
*/
/**
* Adds the BIN to the queue if the BIN is not already in the queue.
*/
public void addBinToQueue(BIN bin, boolean doWakeup) {
synchronized (binRefQueueSync) {
addBinToQueueAlreadyLatched(bin);
}
if (doWakeup) {
wakeup();
}
}
/**
* Adds the BINReference to the queue if the BIN is not already in the
* queue, or adds the deleted keys to an existing entry if one exists.
*/
public void addBinRefToQueue(BINReference binRef, boolean doWakeup) {
synchronized (binRefQueueSync) {
addBinRefToQueueAlreadyLatched(binRef);
}
if (doWakeup) {
wakeup();
}
}
/**
* Adds an entire collection of BINReferences to the queue at once. Use
* this to avoid latching for each add.
*/
public void addMultipleBinRefsToQueue(Collection<BINReference> binRefs,
boolean doWakeup) {
synchronized (binRefQueueSync) {
Iterator<BINReference> it = binRefs.iterator();
while (it.hasNext()) {
BINReference binRef = it.next();
addBinRefToQueueAlreadyLatched(binRef);
}
}
if (doWakeup) {
wakeup();
}
}
/**
* Adds the BINReference with the latch held.
*/
private void addBinRefToQueueAlreadyLatched(BINReference binRef) {
final Long node = Long.valueOf(binRef.getNodeId());
if (binRefQueue.containsKey(node)) {
return;
}
binRefQueue.put(node, binRef);
}
/**
* Adds the BIN with the latch held.
*/
private void addBinToQueueAlreadyLatched(BIN bin) {
final Long node = Long.valueOf(bin.getNodeId());
if (binRefQueue.containsKey(node)) {
return;
}
binRefQueue.put(node, bin.createReference());
}
public boolean exists(long nodeId) {
synchronized (binRefQueueSync) {
return binRefQueue.containsKey(nodeId);
}
}
/**
* Return stats
*/
public StatGroup loadStats(StatsConfig config) {
compQueueSize.set((long) getBinRefQueueSize());
if (DEBUG) {
System.out.println("lazyProcessed = " + lazyProcessed);
System.out.println("wokenUp=" + wokenUp);
}
if (config.getClear()) {
lazyProcessed = 0;
wokenUp = 0;
}
return stats.cloneGroup(config.getClear());
}
/**
* Return the number of retries when a deadlock exception occurs.
*/
@Override
protected long nDeadlockRetries() {
return env.getConfigManager().getInt
(EnvironmentParams.COMPRESSOR_RETRY);
}
@Override
public synchronized void onWakeup()
throws DatabaseException {
if (env.isClosed()) {
return;
}
wokenUp++;
doCompress();
}
/**
* The real work to doing a compress. This may be called by the compressor
* thread or programatically.
*/
public synchronized void doCompress()
throws DatabaseException {
/*
* Make a snapshot of the current work queue so the compressor thread
* can safely iterate over the queue. Note that this impacts lazy
* compression, because it lazy compressors will not see BINReferences
* that have been moved to the snapshot.
*/
Map<Long, BINReference> queueSnapshot = null;
int binQueueSize = 0;
synchronized (binRefQueueSync) {
binQueueSize = binRefQueue.size();
if (binQueueSize > 0) {
queueSnapshot = binRefQueue;
binRefQueue = new HashMap<Long, BINReference>();
}
}
/* There is work to be done. */
if (binQueueSize > 0) {
resetPerRunCounters();
LoggerUtils.fine(logger, envImpl,
"InCompress.doCompress called, queue size: " +
binQueueSize);
assert LatchSupport.countLatchesHeld() == 0;
/*
* Compressed entries must be counted as obsoleted. A separate
* tracker is used to accumulate tracked obsolete info so it can be
* added in a single call under the log write latch. We log the
* info for deleted subtrees immediately because we don't process
* deleted IN entries during recovery; this reduces the chance of
* lost info.
*/
LocalUtilizationTracker localTracker =
new LocalUtilizationTracker(env);
/* Use local caching to reduce DbTree.getDb overhead. */
Map<DatabaseId, DatabaseImpl> dbCache =
new HashMap<DatabaseId, DatabaseImpl>();
DbTree dbTree = env.getDbTree();
BINSearch binSearch = new BINSearch();
try {
Iterator<BINReference> it = queueSnapshot.values().iterator();
while (it.hasNext()) {
if (env.isClosed()) {
return;
}
BINReference binRef = it.next();
if (!findDBAndBIN(binSearch, binRef, dbTree, dbCache)) {
/*
* Either the db is closed, or the BIN doesn't exist.
* Don't process this BINReference.
*/
continue;
}
/* Compress deleted slots and prune if possible. */
compressBin(binSearch.db, binSearch.bin, binRef,
localTracker);
}
/* SR [#11144]*/
assert TestHookExecute.doHookIfSet(beforeFlushTrackerHook);
/*
* Count obsolete nodes and write out modified file summaries
* for recovery. All latches must have been released.
*/
env.getUtilizationProfile().flushLocalTracker(localTracker);
} finally {
dbTree.releaseDbs(dbCache);
assert LatchSupport.countLatchesHeld() == 0;
accumulatePerRunCounters();
}
}
}
/**
* Compresses a single BIN and then deletes the BIN if it is empty.
*
* @param bin is latched when this method is called, and unlatched when it
* returns.
*/
private void compressBin(DatabaseImpl db,
BIN bin,
BINReference binRef,
LocalUtilizationTracker localTracker) {
/* Safe to get identifier keys; bin is latched. */
final byte[] idKey = bin.getIdentifierKey();
boolean empty = (bin.getNEntries() == 0);
try {
if (!empty) {
/*
* If a delta will be logged, do not compress, check for
* emptiness or re-add the entry to the queue.
*
* We strive not to add a slot to the queue when we will log a
* delta. However, it is possible that an entry is added, or
* that an entry is not cleared by lazy compression prior to
* logging a full BIN. Clean-up for such queue entries is
* here.
*/
if (bin.shouldLogDelta()) {
return;
}
/* If there are cursors on the BIN, requeue and try later. */
if (bin.nCursors() > 0) {
addBinRefToQueue(binRef, false);
cursorsBinsThisRun++;
return;
}
/* If compression is incomplete, requeue and try later. */
if (!bin.compress(localTracker)) {
addBinRefToQueue(binRef, false);
return;
}
/* After compression the BIN may be empty. */
empty = (bin.getNEntries() == 0);
}
} finally {
bin.releaseLatch();
}
/* After releasing the latch, prune the BIN if it is empty. */
if (empty) {
pruneBIN(db, binRef, idKey, localTracker);
}
}
/**
* If the target BIN is empty, attempt to remove the empty branch of the
* tree.
*/
private void pruneBIN(DatabaseImpl dbImpl,
BINReference binRef,
byte[] idKey,
LocalUtilizationTracker localTracker) {
try {
Tree tree = dbImpl.getTree();
tree.delete(idKey, localTracker);
processedBinsThisRun++;
} catch (NodeNotEmptyException NNEE) {
/*
* Something was added to the node since the point when the
* deletion occurred; we can't prune, and we can throw away this
* BINReference.
*/
nonEmptyBinsThisRun++;
} catch (CursorsExistException e) {
/* If there are cursors in the way of the delete, retry later. */
addBinRefToQueue(binRef, false);
cursorsBinsThisRun++;
}
}
/**
* Search the tree for the BIN that corresponds to this BINReference.
*
* @param binRef the BINReference that indicates the bin we want.
*
* @return the BIN that corresponds to this BINReference. The
* node is latched upon return. Returns null if the BIN can't be found.
*/
public BIN searchForBIN(DatabaseImpl db, BINReference binRef) {
return (BIN) db.getTree().search
(binRef.getKey(), SearchType.NORMAL, null, CacheMode.UNCHANGED,
null /*keyComparator*/);
}
/**
* Reset per-run counters.
*/
private void resetPerRunCounters() {
splitBinsThisRun = 0;
dbClosedBinsThisRun = 0;
cursorsBinsThisRun = 0;
nonEmptyBinsThisRun = 0;
processedBinsThisRun = 0;
}
private void accumulatePerRunCounters() {
splitBins.add(splitBinsThisRun);
dbClosedBins.add(dbClosedBinsThisRun);
cursorsBins.add(cursorsBinsThisRun);
nonEmptyBins.add(nonEmptyBinsThisRun);
processedBins.add(processedBinsThisRun);
}
/**
* Lazily compress prior to logging a full version of a BIN; the caller
* is responsible for ensuring that a full version is likely to be logged
* next. Do not do any pruning. The target IN should be latched when we
* enter, and it will be remain latched.
*
* When an LN is deleted and a delta will be logged next (see
* BIN.shouldLogDelta), we do not add the slot to the compressor queue
* because compression must be deferred until the full version is logged.
* Therefore we cannot rely on the compressor to delete all slots and we do
* the final deferred compression here.
*
* Note that we do not bother to delete queue entries for the BIN if
* compression succeeds. Queue entries are normally removed quickly by the
* compressor. In the case where queue entries happen to exist when we do
* the final compression below, we rely on the compressor to clean them up
* later on when they are processed.
*/
public void lazyCompress(IN in) {
/* Only BINs are compressible. */
if (!in.isCompressible()) {
return;
}
final BIN bin = (BIN) in;
assert bin.isLatchOwnerForWrite();
/* Cursors prohibit compression. */
if (bin.nCursors() > 0) {
return;
}
/* Compress. Then if empty, queue for pruning. */
if (bin.compress(null /*localTracker*/)) {
if (bin.getNEntries() == 0) {
addBinToQueue(bin, false);
}
}
lazyProcessed++;
}
/*
* Find the db and bin for a BINReference.
* @return true if the db is open and the target bin is found.
*/
private boolean findDBAndBIN(BINSearch binSearch,
BINReference binRef,
DbTree dbTree,
Map<DatabaseId, DatabaseImpl> dbCache)
throws DatabaseException {
/*
* Find the database. Do not call releaseDb after this getDb, since
* the entire dbCache will be released later.
*/
binSearch.db = dbTree.getDb
(binRef.getDatabaseId(), lockTimeout, dbCache);
if ((binSearch.db == null) ||(binSearch.db.isDeleted())) {
/* The db was deleted. Ignore this BIN Ref. */
dbClosedBinsThisRun++;
return false;
}
/* Perform eviction before each operation. */
env.daemonEviction(true /*backgroundIO*/);
/* Find the BIN. */
binSearch.bin = searchForBIN(binSearch.db, binRef);
if ((binSearch.bin == null) ||
binSearch.bin.getNodeId() != binRef.getNodeId()) {
/* The BIN may have been split. */
if (binSearch.bin != null) {
binSearch.bin.releaseLatch();
}
splitBinsThisRun++;
return false;
}
return true;
}
/* Struct to return multiple values from findDBAndBIN. */
private static class BINSearch {
public DatabaseImpl db;
public BIN bin;
}
}