/**
* Copyright © 2011-2012 Akiban Technologies, Inc. All rights reserved.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* This program may also be available under different license terms.
* For more information, see www.akiban.com or contact licensing@akiban.com.
*
* Contributors:
* Akiban Technologies, Inc.
*/
package com.persistit;
import static com.persistit.TransactionStatus.ABORTED;
import static com.persistit.TransactionStatus.PRIMORDIAL;
import static com.persistit.TransactionStatus.TIMED_OUT;
import static com.persistit.TransactionStatus.UNCOMMITTED;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import com.persistit.Accumulator.Delta;
import com.persistit.exception.RetryException;
import com.persistit.exception.TimeoutException;
import com.persistit.mxbeans.TransactionIndexMXBean;
import com.persistit.util.Debug;
/**
* Keep track of concurrent transactions and those that committed or aborted
* recently.
*
* @author peter
*/
class TransactionIndex implements TransactionIndexMXBean {
/**
* Thread name of the polling task
*/
final static String POLLING_TASK_NAME = "TXN_UPDATE";
/**
* Interval in milliseconds for updating the active transaction cache
*/
final static long POLLING_TASK_INTERVAL = 10;
/**
* Default threshold value for moving long-running transactions to the
* {@link #_longRunning} list.
*/
final static int DEFAULT_LONG_RUNNING_THRESHOLD = 5;
/**
* Default maximum number of TransactionStatus instances to hold on the free
* list.
*/
final static int DEFAULT_MAX_FREE_LIST_SIZE = 20;
/**
* Default maximum number of Delta instances to hold on the free list.
*/
final static int DEFAULT_MAX_FREE_DELTA_LIST_SIZE = 50;
/**
* TODO - more thought on timeout processing.
*/
final static long VERY_LONG_TIMEOUT = 60000; // sixty seconds
/**
* Short timeout for lock polling
*/
final static long SHORT_TIMEOUT = 10;
/**
* Maximum length of path in deadlock detector before deadlock is assumed.
*/
final static int CYCLE_LIMIT = 10;
/**
* Initial size of arrays in ActiveTransactionCaches.
*/
private final static int INITIAL_ACTIVE_TRANSACTIONS_SIZE = 1000;
/**
* Maximum version handle "steps" within one transaction
*/
final static int VERSION_HANDLE_MULTIPLIER = 100;
/**
* The hash table.
*/
private final TransactionIndexBucket[] _hashTable;
/**
* Adjustable threshold count at which a transaction on the _current list is
* moved to the {@link #_longRunning} list so that the {@link #_floor} can
* be raised.
*/
volatile int _longRunningThreshold = DEFAULT_LONG_RUNNING_THRESHOLD;
/**
* Maximum number of {@link TransactionStatus) objects to hold on the free
* list. Once this number is reached any addition deallocated instances are
* released for garbage collection.
*/
volatile int _maxFreeListSize = DEFAULT_MAX_FREE_LIST_SIZE;
/**
* Maximum number of {@link Delta} instances to hold on the free list.
*/
volatile int _maxFreeDeltaListSize = DEFAULT_MAX_FREE_DELTA_LIST_SIZE;
/**
* One of two ActiveTransactionCache instances
*/
private final ActiveTransactionCache _atCache1;
/**
* One of two ActiveTransactionCache instances
*/
private final ActiveTransactionCache _atCache2;
/**
* Lock held by a thread updating the ActiveTransactionCache to prevent a
* race by another thread attempting also to update.
*/
private final ReentrantLock _atCacheLock = new ReentrantLock();
/**
* Reference to the more recently updated of two ActiveTransactionCache
* instances.
*/
private volatile ActiveTransactionCache _atCache;
private final AtomicLong _deadlockCounter = new AtomicLong();
private final AtomicLong _accumulatorSnapshotRetryCounter = new AtomicLong();
private final AtomicLong _accumulatorCheckpointRetryCounter = new AtomicLong();
/**
* The system-wide timestamp allocator
*/
private final TimestampAllocator _timestampAllocator;
private ActiveTransactionCachePollTask _activeTransactionCachePollTask;
class ActiveTransactionCachePollTask extends IOTaskRunnable {
AtomicBoolean _closed = new AtomicBoolean();
ActiveTransactionCachePollTask(final Persistit persistit) {
super(persistit);
}
void close() {
_closed.set(true);
}
@Override
protected boolean shouldStop() {
return _closed.get();
}
@Override
protected void runTask() throws Exception {
updateActiveTransactionCache();
}
}
/**
* <p>
* Cached copy of currently active transactions. Instances of this class
* support the {@link TransactionIndex#hasConcurrentTransaction(long, long)}
* method. In general it is expensive to look at transaction status directly
* while pruning due to the need to lock each TransactionIndexBucket to read
* its lists. Instead of scanning all of these on each pruning operation we
* periodically compute an array of ts values for transactions that are
* currently running. These are assembled into a sorted array in this
* object. There are two instances of this class in the TransactionIndex,
* one used for concurrent pruning and the other available to recompute a
* more recent array of transactions.
* </p>
* <p>
* Any thread may call recompute on the ActiveTransactionCache that is not
* in service, but it must first lock the atCacheLock to prevent another
* from overwriting its work.
* </p>
* <p>
* Each time the cache is recomputed, this instance first gets the current
* timestamp t. Due to the write-ordering protocol, it is guaranteed that if
* a transaction having a start timestamp less than t is currently active,
* its entry will be in the hash table. Therefore, scanning the hash table
* will find every currently active transaction having a start timestamp
* less than t. Note that by the time the scan is done some of those
* transactions may have committed or aborted; therefore the set of
* transactions added to the cache may be a superset of those that are
* active at the conclusion of the scan, but that is okay. The result of
* that imprecision is that in some cases an MVV may not be optimally pruned
* until a later attempt.
* </p>
* <p>
* By the time this cache is read there may be newly registered transactions
* having start timestamps greater than t. Again, such a transaction may
* have registered and committed in the time since the scan was performed;
* nonetheless the {@link #hasConcurrentTransaction(long, long)} method will
* indicate that such a transaction is still active. Again, the result of
* that imprecision is that in some cases an MVV may not be optimally pruned
* until a later attempt.
* </p>
*
*/
class ActiveTransactionCache {
/**
* Largest timestamp for which the current copy of _tsArray is accurate.
*/
private volatile long _ceiling;
/**
* Smallest timestamp in _tsArray
*/
private volatile long _floor;
/**
* Cache for a recent concurrent transaction scan.
*/
private volatile long[] _tsArray = new long[INITIAL_ACTIVE_TRANSACTIONS_SIZE];
private volatile int _count;
void recompute() {
_count = 0;
final long timestampAtStart = _timestampAllocator.updateTimestamp();
long floor = timestampAtStart;
for (final TransactionIndexBucket bucket : _hashTable) {
if (bucket.getCurrent() != null || bucket.getLongRunning() != null) {
bucket.lock();
try {
if (bucket.hasFloorMoved()) {
bucket.reduce();
}
for (TransactionStatus status = bucket.getCurrent(); status != null; status = status.getNext()) {
if (status.getTs() <= timestampAtStart && !status.isNotified()) {
add(status.getTs());
if (status.getTs() < floor) {
floor = status.getTs();
}
}
}
for (TransactionStatus status = bucket.getLongRunning(); status != null; status = status
.getNext()) {
if (status.getTs() <= timestampAtStart && !status.isNotified()) {
add(status.getTs());
if (status.getTs() < floor) {
floor = status.getTs();
}
}
}
} finally {
bucket.unlock();
}
}
}
Arrays.sort(_tsArray, 0, _count);
_ceiling = timestampAtStart;
_floor = floor;
}
private void add(final long ts) {
final int index = _count;
if (++_count >= _tsArray.length) {
final long[] temp = new long[_count + INITIAL_ACTIVE_TRANSACTIONS_SIZE];
System.arraycopy(_tsArray, 0, temp, 0, _tsArray.length);
_tsArray = temp;
}
_tsArray[index] = ts;
}
boolean hasConcurrentTransaction(final long ts1, final long ts2) {
if (ts2 > _ceiling) {
return true;
}
if (ts1 > ts2 || ts2 < _floor) {
return false;
}
/*
* Note: we may consider a binary search here depending on the
* length of this array.
*/
for (int index = 0; index < _count; index++) {
final long ts = _tsArray[index];
if (ts > ts2) {
return false;
}
if (ts > ts1) {
return true;
}
}
return false;
}
@Override
public String toString() {
long low = Long.MAX_VALUE;
long high = Long.MIN_VALUE;
for (int index = 0; index < _count; index++) {
low = Math.min(low, _tsArray[index]);
high = Math.max(high, _tsArray[index]);
}
return String.format("Floor=%,d Ceiling=%,d Low=%s High=%s Count=%,d", _floor, _ceiling, minMaxString(low),
minMaxString(high), _count);
}
}
public static long vh2ts(final long versionHandle) {
return versionHandle / VERSION_HANDLE_MULTIPLIER;
}
public static long ts2vh(final long ts) {
return ts * VERSION_HANDLE_MULTIPLIER;
}
public static long tss2vh(final long timestamp, final int step) {
return ts2vh(timestamp) + step;
}
public static int vh2step(final long versionHandle) {
return (int) (versionHandle % VERSION_HANDLE_MULTIPLIER);
}
TransactionIndex(final TimestampAllocator timestampAllocator, final int hashTableSize) {
_timestampAllocator = timestampAllocator;
_hashTable = new TransactionIndexBucket[hashTableSize];
for (int hashIndex = 0; hashIndex < hashTableSize; hashIndex++) {
_hashTable[hashIndex] = new TransactionIndexBucket(this, hashIndex);
}
_atCache1 = new ActiveTransactionCache();
_atCache2 = new ActiveTransactionCache();
_atCache = _atCache1;
}
int getHashTableSize() {
return _hashTable.length;
}
int getMaxFreeListSize() {
return _maxFreeListSize;
}
int getMaxFreeDeltaListSize() {
return _maxFreeDeltaListSize;
}
int getLongRunningThreshold() {
return _longRunningThreshold;
}
TimestampAllocator getTimestampAllocator() {
return _timestampAllocator;
}
/**
* Given a start timestamp <code>ts</code> of the current transaction,
* determine the commit status of a value at the specified
* <code>versionHandle</code>. The result depends on the status of the
* transaction T identified by the <code>versionHandle</code> as follows:
* <ul>
* <li>If T's start timestamp is primordial (0), return
* {@link TransactionStatus#PRIMORDIAL}.</li>
* <li>If T is the same transaction as this one (the transaction identified
* by <code>ts</code>) then the result depends on the relationship between
* the "step" number encoded in the supplied <code>versionHandle</code>
* (stepv) and the supplied <code>step</code>parameter:
* <ul>
* <li>If stepv <= step then return tsv as the "commit" timestamp. (Note
* that the transaction has not actually committed, but for the purpose of
* reading values during the execution of the transaction it is as if that
* transaction's own updates are present.)</li>
* <li>Else return {@link TransactionStatus#UNCOMMITTED}.</li>
* </ul>
* <li>If T has committed, the result depends on the start timestamp
* <code>ts</code> of the current transaction: if T committed before
* <code>ts</code> the result is T's commit timestamp <code>tc</code>,
* otherwise it is {@link TransactionStatus#UNCOMMITTED}.</li>
* <li>If T has aborted, the result is {@link TransactionStatus#ABORTED}.</li>
* <li>If T has not requested to commit, i.e., does not have proposal
* timestamp <code>tp</code>, or than proposal timestamp is greater than
* <code>ts</code> the result is {@link TransactionStatus#UNCOMMITTED}.</li>
* <li>If T has requested to commit and its proposal timestamp is less than
* <code>ts</code>, but has neither completed nor failed yet, then this
* method waits until T's commit status has been determined.</li>
* </ul>
*
* @param versionHandle
* the version handle of a value version
* @param ts
* the transaction start timestamp of the current transaction
* @param step
* Step number within the current transaction.
* @return the commit status
* @throws InterruptedException
* if the waiting thread is interrupted
* @throws TimeoutException
* if the thread waited a very long time without resolving the
* commit status; this signifies a serious software error.
*/
long commitStatus(final long versionHandle, final long ts, final int step) throws InterruptedException,
TimeoutException {
final long tsv = vh2ts(versionHandle);
if (tsv == PRIMORDIAL) {
return PRIMORDIAL;
}
if (tsv == ts) {
/*
* The update was created by this transaction. Policy is that if the
* version was written by an earlier or equal to step, return a
* valid commit timestamp (even though that version has not yet been
* committed). Otherwise return UNCOMITTED to prevent it from being
* read.
*/
final int stepv = vh2step(versionHandle);
if (stepv <= step) {
return tsv;
} else {
return UNCOMMITTED;
}
} else {
/*
* If the version's start timestamp is greater than current
* transaction's timestamp, then it cannot have committed in time to
* be visible.
*/
if (tsv > ts) {
return UNCOMMITTED;
}
}
final int hashIndex = hashIndex(tsv);
final TransactionIndexBucket bucket = _hashTable[hashIndex];
/*
* First check whether there are any TransactionStatus instances in the
* bucket. In a (hopefully) common case where there are none, then we
* can assume the value did commit, and do so without locking the
* bucket.
*
* We can read these members without locking because (a) they are
* volatile, and (b) write-ordering guarantees that a TransactionStatus
* that is being moved to either the aborted or long-running list will
* be added to the new list before being removed from the current list.
* These values are all visible to us with respect to a particular tsv
* because we could not have seen the tsv without its corresponding
* transaction status having been registered.
*/
if ((bucket.getCurrent() == null || tsv < bucket.getFloor()) && bucket.getLongRunning() == null
&& bucket.getAborted() == null) {
return tsv;
}
/*
* Otherwise search the bucket and find the TransactionStatus for tsv.
*/
final long commitTimestamp = tsv;
/*
* There were members on at least one of the lists so we need to try to
* find the corresponding TransactionStatus identified by tsv.
*/
final TransactionStatus status = getStatus(tsv);
/*
* The result can be null in the event the TransactionStatus was freed.
* It could only have been freed if its transaction committed at a tc
* that is now primordial. Therefore if status is null we can return tsv
* as the imputed tc value.
*/
if (status != null) {
/*
* Found the TransactionStatus identified by tsv, but by the time we
* we read its tc, that TransactionStatus may already be committed
* to a new transaction with a different ts. Therefore we briefly to
* lock it to get an accurate reading.
*
* If the TransactionStatus was concurrently freed and reallocated
* to a different transaction, then it must have committed before
* the floor timestamp.
*/
long tc = status.getTc();
while (status.getTs() == tsv) {
if (tc >= ts) {
return UNCOMMITTED;
}
if (tc >= 0) {
return tc;
}
if (tc == ABORTED) {
return tc;
}
/*
* Waiting for status to resolve. To do this, lock, unlock and
* then retry.
*/
if (status.wwLock(SHORT_TIMEOUT)) {
tc = status.getTc();
status.wwUnlock();
}
}
}
return commitTimestamp;
}
/**
* Atomically assign a start timestamp and register a transaction within the
* <code>TransactionIndex</code>. Once registered, the transaction's commit
* status can be found by calling {@link #commitStatus(long, long)}. It is
* important that assigning the timestamp and making the transaction
* accessible within the TransactionIndex is atomic because otherwise a
* concurrent transaction with a larger start timestamp could fail to see
* this one and cause inconsistent results.
*
* @return the TransactionStatus.
* @throws InterruptedException
* @throws TimeoutException
*/
TransactionStatus registerTransaction() throws TimeoutException, InterruptedException {
return registerTransaction(false);
}
/**
* Atomically assign a timestamp as described by
* {@link #registerTransaction()} AND atomically assign that timestamp as a
* new checkpoint timestamp. This method should be called only by
* {@link CheckpointManager#createCheckpoint()}.
*
* @return the TransactionStatus.
* @throws InterruptedException
* @throws TimeoutException
*/
TransactionStatus registerCheckpointTransaction() throws TimeoutException, InterruptedException {
return registerTransaction(true);
}
private TransactionStatus registerTransaction(final boolean forCheckpoint) throws TimeoutException,
InterruptedException {
Debug.suspend();
final TransactionStatus status;
final TransactionIndexBucket bucket;
synchronized (this) {
final long ts;
if (forCheckpoint) {
ts = _timestampAllocator.allocateCheckpointTimestamp();
} else {
ts = _timestampAllocator.updateTimestamp();
}
final int index = hashIndex(ts);
bucket = _hashTable[index];
bucket.lock();
try {
status = bucket.allocateTransactionStatus();
status.initialize(ts);
bucket.addCurrent(status);
} finally {
bucket.unlock();
}
}
try {
/*
* The TransactionStatus is locked for the entire duration of the
* running transaction. The following call should always succeed
* immediately; a TimeoutException here signifies a software failure
* or a thread terminated by {@link Thread#stop()} somewhere else.
*/
if (!status.wwLock(VERY_LONG_TIMEOUT)) {
throw new IllegalStateException("wwLock was unavailable on newly allocated TransactionStatus");
}
/*
* General hygiene - call reduce if the current count is bigger than
* the threshold - but this is merely an optimization and the test
* does not need to be synchronized.
*/
if (bucket.getCurrentCount() > _longRunningThreshold) {
bucket.lock();
try {
bucket.reduce();
} finally {
bucket.unlock();
}
}
} catch (final InterruptedException ie) {
status.abort();
status.complete(0);
throw ie;
}
return status;
}
/**
* Notify the TransactionIndex that the specified Transaction has committed
* or aborted. This method allows the TransactionIndex to awaken any threads
* waiting for resolution of commit status or a write-write dependency.
*
* @param status
* the TransactionStatus that has committed or aborted.
* @param timestamp
* the timestamp to post as the transaction's commit timestamp
*/
void notifyCompleted(final TransactionStatus status, final long timestamp) {
final int hashIndex = hashIndex(status.getTs());
final TransactionIndexBucket bucket = _hashTable[hashIndex];
bucket.lock();
try {
bucket.notifyCompleted(status, timestamp);
} finally {
bucket.unlock();
}
}
/**
* <p>
* Detect whether there exists a registered transaction that has neither
* committed nor aborted having a starting timestamp <code>ts</code> such
* that <code>ts1</code> < <code>ts</code> < <code>ts2</code>.
* </p>
* <p>
* This method is not synchronized and therefore may return a
* <code>true</code> value for a transaction which then immediately before
* the caller acts on the result value either commits or aborts. However,
* the converse is not true. Provided <code>ts2</code> is a valid timestamp
* value created by the <code>TimestampAllocator</code>, then if there
* exists a concurrent transaction with a start timestamp in the specified
* range, this method is guaranteed to return <code>true</code>
* </p>
*
* @param ts1
* first timestamp
* @param ts2
* second timestamp
* @return <code>true</code> if there exists a concurrent transaction that
* started between ts1 and ts2
*/
public boolean hasConcurrentTransaction(final long ts1, final long ts2) {
return _atCache.hasConcurrentTransaction(ts1, ts2);
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getActiveTransactionFloor()
*/
@Override
public long getActiveTransactionFloor() {
return _atCache._floor;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getActiveTransactionCeiling()
*/
@Override
public long getActiveTransactionCeiling() {
return _atCache._ceiling;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getActiveTransactionCount()
*/
@Override
public long getActiveTransactionCount() {
return _atCache._count;
}
/**
* @return current ActiveTransactionCache instance
*/
ActiveTransactionCache getActiveTransactionCache() {
return _atCache;
}
TransactionStatus getStatus(final long tsv) {
if (tsv == PRIMORDIAL) {
return null;
}
final int hashIndex = hashIndex(tsv);
final TransactionIndexBucket bucket = _hashTable[hashIndex];
/*
* First check whether there are any TransactionStatus instances in the
* bucket. If not then the transaction that committed this value is not
* concurrent.
*
* We can read these members without locking because (a) they are
* volatile, and (b) write-ordering guarantees that a TransactionStatus
* that is being moved to either the aborted or long-running list will
* be added to the new list before being removed from the current list.
* These values are all visible to us with respect to a particular tsv
* because we could not have seen the tsv without its corresponding
* transaction status having been registered.
*
* Note: if tsv >= floor it is not sufficient to look only in current.
* This is because the TransactionIndexBucket#reduce() method moves the
* TransactionStatus to longRunning or aborted before it changes the
* floor. But the converse is okay: if tsv < floor, then the
* TransactionStatus must be in longRunning or aborted if present at
* all.
*/
final long floor = bucket.getFloor();
if ((tsv >= floor && bucket.getCurrent() == null || tsv < floor) && bucket.getLongRunning() == null
&& bucket.getAborted() == null) {
/*
* Ensure the floor was stable while reading these variables.
* Otherwise lock and retry safely. Tests show this is almost always
* the case, but there are very occasional misses.
*/
if (floor == bucket.getFloor()) {
return null;
}
}
/*
* There were members on at least one of the lists. Need to lock the
* bucket so we can traverse the lists.
*/
bucket.lock();
try {
/*
* > A transaction with a start timestamp less than or equal to the
* floor is committed unless it is found on either the aborted or
* longRunning lists.
*/
if (tsv >= bucket.getFloor()) {
for (TransactionStatus s = bucket.getCurrent(); s != null; s = s.getNext()) {
if (s.getTs() == tsv) {
return s;
}
}
}
for (TransactionStatus s = bucket.getAborted(); s != null; s = s.getNext()) {
if (s.getTs() == tsv) {
return s;
}
}
for (TransactionStatus s = bucket.getLongRunning(); s != null; s = s.getNext()) {
if (s.getTs() == tsv) {
return s;
}
}
} finally {
bucket.unlock();
}
return null;
}
/**
* <p>
* Detects a write-write dependency from one transaction to another. This
* method is called when transaction having start timestamp <code>ts</code>
* detects that there is already an update from another transaction, the
* <i>target</i> transaction, identified by its <code>versionHandle</code>.
* If the target has already committed or aborted then this method
* immediately returns a value depending on its outcome:
* <ul>
* <li>If the target is concurrent with this transaction and committed, then
* this method returns its commit timestamp, indicating that this
* transaction must abort.</li>
* <li>If the target aborted or committed before this transaction started,
* then this method returns 0 meaning that the write-write dependency has
* been cleared and this transaction may proceed.</li>
* <li>If the target is identified by the <code>versionHandle</code> is the
* same as the current transaction, then this method returns 0.</li>
* <li>If the target already depends on this transaction, then a dependency
* cycle causing a deadlock is detected and this method returns
* {@link TransactionStatus#UNCOMMITTED} to indicate that the target
* transaction is concurrent and this transaction should abort.</li>
* </ul>
* If the target is concurrent but has neither committed nor aborted, then
* this method waits up to <code>timeout</code> milliseconds for the
* target's status to be resolved. If the timeout expires without
* resolution, this method return {@link TransactionStatus#TIMED_OUT}.
* </p>
* <p>
* Design note: this method is called when a transaction intending to add a
* new version discovers there is already an MVV for the same key. The
* transaction is required to determine whether any of the other versions in
* that MVV are from concurrent transactions, and to abort if so. We expect
* this method to be called with a timeout of zero to perform a fast,
* usually non-conflicting, outcome when the page holding the MVV is
* latched. The TIMED_OUT return value indicates that the caller must back
* off the latch, reevaluate the wwDependency with no locks, and then retry.
* </p>
*
* @param versionHandle
* versionHandle of a value version found in an MVV that the
* current transaction intends to update
* @param source
* this transaction's <code>TransactionStatus</code>
* @param timeout
* Time in milliseconds to wait. If the other transaction has
* neither committed nor aborted within this time interval then a
* TimeoutException is thrown.
* @return commit status of the target transaction
* @throws InterruptedException
* if the waiting thread is interrupted
*/
long wwDependency(final long versionHandle, final TransactionStatus source, final long timeout)
throws InterruptedException, IllegalArgumentException {
final long tsv = vh2ts(versionHandle);
if (tsv == source.getTs()) {
/*
* Same transaction
*/
return 0;
}
final TransactionStatus target = getStatus(tsv);
if (target == null) {
/*
* Target is gone
*/
return 0;
}
final long tcommit = target.getTc();
if (target.getTs() != tsv) {
/*
* By the time the selected TransactionStatus has been found, it may
* already be allocated to another transaction. If that's true the
* the original transaction must have committed. The following code
* checks the identity of the transaction on each iteration after
* short lock attempts.
*/
return 0;
}
if (tcommit > 0 && tcommit < source.getTs() || tcommit == ABORTED) {
/*
* Target committed and is not concurrent or it aborted
*/
return 0;
}
final long start = System.currentTimeMillis();
/*
* Blocks until the target transaction finishes, either by committing or
* aborting.
*/
do {
try {
/*
* Link to target transaction, then test for deadlock. Abort
* immediately
*/
source.setDepends(target);
if (isDeadlocked(source)) {
_deadlockCounter.incrementAndGet();
return UNCOMMITTED;
}
if (target.wwLock(Math.min(timeout, SHORT_TIMEOUT))) {
try {
if (target.getTs() != tsv) {
return 0;
}
final long tc = target.getTc();
if (tc == ABORTED) {
return 0;
}
/*
* The following is true because the target's wwLock was
* acquired, which means it has either aborted or
* committed.
*/
if (tc < 0 || tc == UNCOMMITTED) {
throw new IllegalStateException("Commit incomplete");
}
/*
* true if and only if this is a concurrent transaction
*/
if (tc > source.getTs()) {
return tc;
} else {
return 0;
}
} finally {
target.wwUnlock();
}
} else {
if (timeout == 0) {
return TIMED_OUT;
}
if (isDeadlocked(source)) {
_deadlockCounter.incrementAndGet();
return UNCOMMITTED;
}
}
} finally {
source.setDepends(null);
}
} while (timeout > 0 && System.currentTimeMillis() - start < timeout);
return TIMED_OUT;
}
boolean isDeadlocked(final TransactionStatus source) {
TransactionStatus s = source;
for (int count = 0; count < CYCLE_LIMIT; count++) {
s = s.getDepends();
if (s == null || s.getTc() == ABORTED) {
return false;
} else if (s == source) {
return true;
}
}
return true;
}
/**
* Atomically decrement the MVV count for the aborted
* <code>TransactionStatus</code> identified by the suppled version handle.
*
* @param versionHandle
* @return The resulting count
* @throws IllegalArgumentException
* if the supplied <code>versionHandle</code> does not identify
* an aborted transaction.
*/
long decrementMvvCount(final long versionHandle) {
final long tsv = vh2ts(versionHandle);
final TransactionStatus status = getStatus(tsv);
if (status == null || status.getTs() != tsv || status.getTc() != ABORTED) {
throw new IllegalArgumentException("No such aborted transaction " + versionHandle);
}
return status.decrementMvvCount();
}
/**
* Compute hash index for a given timestamp.
*
* @param ts
* @return the hash table index
*/
private int hashIndex(final long ts) {
return (((int) ts ^ (int) (ts >>> 32)) & Integer.MAX_VALUE) % _hashTable.length;
}
/**
* Add a TransactionStatus with in the ABORTED state to the appropriate
* bucket. This method is called during recovery processing to register
* transactions that were
*
* @param timestamp
* @throws InterruptedException
*/
void injectAbortedTransaction(final long ts) throws InterruptedException {
final TransactionStatus status;
final TransactionIndexBucket bucket;
synchronized (this) {
final int index = hashIndex(ts);
bucket = _hashTable[index];
bucket.lock();
try {
status = bucket.allocateTransactionStatus();
status.initializeAsAborted(ts);
bucket.addAborted(status);
} finally {
bucket.unlock();
}
}
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#updateActiveTransactionCache()
*/
@Override
public void updateActiveTransactionCache() {
Debug.suspend();
_atCacheLock.lock();
try {
final ActiveTransactionCache alternate = _atCache == _atCache1 ? _atCache2 : _atCache1;
alternate.recompute();
_atCache = alternate;
} finally {
_atCacheLock.unlock();
}
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#cleanup()
*/
@Override
public void cleanup() {
updateActiveTransactionCache();
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
bucket.reduce();
} finally {
bucket.unlock();
}
}
}
/**
* Clear the MVV count for all aborted TransactionStatus instances that
* started before the specified timestamp. This method may be called by a
* utility program such as IntegrityCheck that has verified the
* non-existence of relevant MVV values across the entire database.
*
* @return Count of TransationStatus instances affected
*/
int resetMVVCounts(final long timestamp) {
int count = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
count += bucket.resetMVVCounts(timestamp);
} finally {
bucket.unlock();
}
}
return count;
}
/**
* Compute and return the snapshot value of an Accumulator
*
* @throws InterruptedException
*/
long getAccumulatorSnapshot(final Accumulator accumulator, final long timestamp, final int step,
final long initialValue) throws InterruptedException {
long result = initialValue;
for (final TransactionIndexBucket bucket : _hashTable) {
boolean again = true;
while (again) {
again = false;
bucket.lock();
try {
result = accumulator
.applyValue(result, bucket.getAccumulatorSnapshot(accumulator, timestamp, step));
} catch (final RetryException e) {
again = true;
} finally {
bucket.unlock();
}
}
}
return result;
}
/**
* <p>
* Compute a snapshot value for each of the supplied Accumulators and store
* the resulting value in the Accumulator's checkpointValue field. This
* method performs the same function as calling
* {@link #getAccumulatorSnapshot(Accumulator, long, int, long)} on each
* Accumulator, but is more efficient because it visits each bucket once
* rather than once per Accumulator.
* </p>
* <p>
* This method is sensitive to the transactional context in which it is
* called. It is intended to be called only within the Transaction context
* created during the {@link CheckpointManager#createCheckpoint()} method.
* </p>
*
* @param timestamp
* checkpoint timestamp
* @param accumulators
* List of Accumulators that need to be check-pointed
* @throws InterruptedException
*/
void checkpointAccumulatorSnapshots(final long timestamp, final List<Accumulator> accumulators)
throws InterruptedException {
final Map<Accumulator, Accumulator> lookupMap = new HashMap<Accumulator, Accumulator>();
for (final Accumulator accumulator : accumulators) {
lookupMap.put(accumulator, accumulator);
accumulator.setCheckpointValueAndTimestamp(accumulator.getBaseValue(), Long.MIN_VALUE);
}
for (final TransactionIndexBucket bucket : _hashTable) {
boolean again = true;
while (again) {
again = false;
bucket.lock();
try {
for (final Accumulator accumulator : accumulators) {
accumulator.setCheckpointTemp(accumulator.getBucketValue(bucket.getIndex()));
}
bucket.checkpointAccumulatorSnapshots(timestamp);
for (final Accumulator accumulator : accumulators) {
accumulator.setCheckpointValueAndTimestamp(
accumulator.applyValue(accumulator.getCheckpointValue(),
accumulator.getCheckpointTemp()), timestamp);
}
} catch (final RetryException e) {
again = true;
} finally {
bucket.unlock();
}
}
}
}
/**
* Create and return a brand new delta associated with the given status.
* Note that it is completely uninitialized and always allocated from the
* bucket.
*
* @param status
* Status to add the delta to.
* @return The new Delta.
*/
Delta addDelta(final TransactionStatus status) {
final int hashIndex = hashIndex(status.getTs());
final TransactionIndexBucket bucket = _hashTable[hashIndex];
bucket.lock();
try {
final Delta delta = bucket.allocateDelta();
status.addDelta(delta);
return delta;
} finally {
bucket.unlock();
}
}
/**
* Create, or combine, new delta information for the given status. This
* method attempts to find a compatible delta (see
* {@link Delta#canMerge(Accumulator, int)}) to combine with before
* allocating a new one. If one is not found,
* {@link #addDelta(TransactionStatus)} is called and initialized before
* returning.
*
* @param status
* Status to add, or combine, delta to.
* @param accumulator
* Accumulator being modified.
* @param step
* Step value of modification.
* @param value
* The value to add or combine.
*
* @return Delta that was created or modified.
*/
Delta addOrCombineDelta(final TransactionStatus status, final Accumulator accumulator, final int step,
final long value) {
// Check current deltas, no lock as status is single txn/thread
Delta delta = status.getDelta();
while (delta != null) {
if (delta.canMerge(accumulator, step)) {
delta.merge(value);
return null;
}
delta = delta.getNext();
}
// No compatible existing delta, create a new one
delta = addDelta(status);
delta.setAccumulator(accumulator);
delta.setStep(step);
delta.setValue(value);
return delta;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getCurrentCount()
*/
@Override
public int getCurrentCount() {
int currentCount = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
currentCount += bucket.getCurrentCount();
} finally {
bucket.unlock();
}
}
return currentCount;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getLongRunningCount()
*/
@Override
public int getLongRunningCount() {
int longRunningCount = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
longRunningCount += bucket.getLongRunningCount();
} finally {
bucket.unlock();
}
}
return longRunningCount;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getAbortedCount()
*/
@Override
public int getAbortedCount() {
int abortedCount = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
abortedCount += bucket.getAbortedCount();
} finally {
bucket.unlock();
}
}
return abortedCount;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getFreeCount()
*/
@Override
public int getFreeCount() {
int freeCount = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
freeCount += bucket.getFreeCount();
} finally {
bucket.unlock();
}
}
return freeCount;
}
/*
* (non-Javadoc)
*
* @see com.persistit.TransactionIndexMXBean#getDroppedCount()
*/
@Override
public int getDroppedCount() {
int droppedCount = 0;
for (final TransactionIndexBucket bucket : _hashTable) {
bucket.lock();
try {
droppedCount += bucket.getDroppedCount();
} finally {
bucket.unlock();
}
}
return droppedCount;
}
/**
* Return the start timestamps of the oldest <code>max</code> transactions
* currently running.
*
* @param max
* @return
*/
long[] oldestTransactions(final int max) {
final long[] array = new long[Math.max(max, INITIAL_ACTIVE_TRANSACTIONS_SIZE)];
int count = 0;
for (int retry = 0; retry < 10; retry++) {
final ActiveTransactionCache atCache = getActiveTransactionCache();
count = Math.min(max, atCache._count);
System.arraycopy(atCache._tsArray, 0, array, 0, count);
if (getActiveTransactionCache() == atCache) {
break;
}
count = -1;
}
if (count == -1) {
return null;
}
final long[] result = new long[count];
System.arraycopy(array, 0, result, 0, count);
return result;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
for (int index = 0; index < _hashTable.length; index++) {
final TransactionIndexBucket bucket = _hashTable[index];
if (!bucket.isEmpty()) {
sb.append(String.format("%5d: %s\n", index, bucket));
}
}
return sb.toString();
}
static String minMaxString(final long floor) {
return floor == Long.MAX_VALUE ? "MAX" : floor == Long.MIN_VALUE ? "MIN" : String.format("%,d", floor);
}
void start(final Persistit persistit) {
_activeTransactionCachePollTask = new ActiveTransactionCachePollTask(persistit);
_activeTransactionCachePollTask.start(POLLING_TASK_NAME, POLLING_TASK_INTERVAL);
}
ActiveTransactionCachePollTask close() {
final ActiveTransactionCachePollTask task = _activeTransactionCachePollTask;
if (task != null) {
task.close();
_activeTransactionCachePollTask = null;
}
return task;
}
void crash() {
final ActiveTransactionCachePollTask task = _activeTransactionCachePollTask;
if (task != null) {
task.crash();
_activeTransactionCachePollTask = null;
}
}
long incrementAccumulatorSnapshotRetryCounter() {
return _accumulatorSnapshotRetryCounter.incrementAndGet();
}
long incrementAccumulatorCheckpointRetryCounter() {
return _accumulatorCheckpointRetryCounter.incrementAndGet();
}
}