/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.storage.log;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Logger;
import javax.annotation.Nullable;
import javax.inject.Inject;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.primitives.Bytes;
import com.google.inject.assistedinject.Assisted;
import com.twitter.common.base.Closure;
import com.twitter.common.stats.Stats;
import org.apache.aurora.gen.ScheduledTask;
import org.apache.aurora.gen.storage.Frame;
import org.apache.aurora.gen.storage.FrameHeader;
import org.apache.aurora.gen.storage.LogEntry;
import org.apache.aurora.gen.storage.Op;
import org.apache.aurora.gen.storage.RemoveTasks;
import org.apache.aurora.gen.storage.SaveHostAttributes;
import org.apache.aurora.gen.storage.SaveTasks;
import org.apache.aurora.gen.storage.Snapshot;
import org.apache.aurora.gen.storage.Transaction;
import org.apache.aurora.gen.storage.storageConstants;
import org.apache.aurora.scheduler.log.Log;
import org.apache.aurora.scheduler.log.Log.Stream;
import static java.util.Objects.requireNonNull;
import static com.twitter.common.inject.TimedInterceptor.Timed;
import static org.apache.aurora.codec.ThriftBinaryCodec.CodingException;
import static org.apache.aurora.scheduler.log.Log.Stream.InvalidPositionException;
import static org.apache.aurora.scheduler.log.Log.Stream.StreamAccessException;
import static org.apache.aurora.scheduler.storage.log.LogManager.DeduplicateSnapshots;
import static org.apache.aurora.scheduler.storage.log.LogManager.DeflateSnapshots;
import static org.apache.aurora.scheduler.storage.log.LogManager.LogEntryHashFunction;
class StreamManagerImpl implements StreamManager {
private static final Logger LOG = Logger.getLogger(StreamManagerImpl.class.getName());
private static class Vars {
private final AtomicInteger unSnapshottedTransactions =
Stats.exportInt("scheduler_log_un_snapshotted_transactions");
private final AtomicLong bytesWritten = Stats.exportLong("scheduler_log_bytes_written");
private final AtomicLong entriesWritten = Stats.exportLong("scheduler_log_entries_written");
private final AtomicLong badFramesRead = Stats.exportLong("scheduler_log_bad_frames_read");
private final AtomicLong bytesRead = Stats.exportLong("scheduler_log_bytes_read");
private final AtomicLong entriesRead = Stats.exportLong("scheduler_log_entries_read");
private final AtomicLong deflatedEntriesRead =
Stats.exportLong("scheduler_log_deflated_entries_read");
private final AtomicLong snapshots = Stats.exportLong("scheduler_log_snapshots");
}
private final Vars vars = new Vars();
private final Object writeMutex = new Object();
private final Log.Stream stream;
private final EntrySerializer entrySerializer;
private final boolean deflateSnapshots;
private final HashFunction hashFunction;
private final SnapshotDeduplicator snapshotDeduplicator;
private final boolean deduplicateSnapshots;
@Inject
StreamManagerImpl(
@Assisted Stream stream,
EntrySerializer entrySerializer,
@DeflateSnapshots boolean deflateSnapshots,
@LogEntryHashFunction HashFunction hashFunction,
SnapshotDeduplicator snapshotDeduplicator,
@DeduplicateSnapshots boolean deduplicateSnapshots) {
this.stream = requireNonNull(stream);
this.entrySerializer = requireNonNull(entrySerializer);
this.deflateSnapshots = deflateSnapshots;
this.hashFunction = requireNonNull(hashFunction);
this.snapshotDeduplicator = requireNonNull(snapshotDeduplicator);
this.deduplicateSnapshots = deduplicateSnapshots;
}
@Override
public void readFromBeginning(Closure<LogEntry> reader)
throws CodingException, InvalidPositionException, StreamAccessException {
Iterator<Log.Entry> entries = stream.readAll();
while (entries.hasNext()) {
LogEntry logEntry = decodeLogEntry(entries.next());
while (logEntry != null && isFrame(logEntry)) {
logEntry = tryDecodeFrame(logEntry.getFrame(), entries);
}
if (logEntry != null) {
if (logEntry.isSet(LogEntry._Fields.DEFLATED_ENTRY)) {
logEntry = Entries.inflate(logEntry);
vars.deflatedEntriesRead.incrementAndGet();
}
if (logEntry.isSetDeduplicatedSnapshot()) {
logEntry = LogEntry.snapshot(
snapshotDeduplicator.reduplicate(logEntry.getDeduplicatedSnapshot()));
}
reader.execute(logEntry);
vars.entriesRead.incrementAndGet();
}
}
}
@Nullable
private LogEntry tryDecodeFrame(Frame frame, Iterator<Log.Entry> entries) throws CodingException {
if (!isHeader(frame)) {
LOG.warning("Found a frame with no preceding header, skipping.");
return null;
}
FrameHeader header = frame.getHeader();
byte[][] chunks = new byte[header.getChunkCount()][];
Hasher hasher = hashFunction.newHasher();
for (int i = 0; i < header.getChunkCount(); i++) {
if (!entries.hasNext()) {
logBadFrame(header, i);
return null;
}
LogEntry logEntry = decodeLogEntry(entries.next());
if (!isFrame(logEntry)) {
logBadFrame(header, i);
return logEntry;
}
Frame chunkFrame = logEntry.getFrame();
if (!isChunk(chunkFrame)) {
logBadFrame(header, i);
return logEntry;
}
byte[] chunkData = chunkFrame.getChunk().getData();
hasher.putBytes(chunkData);
chunks[i] = chunkData;
}
if (!Arrays.equals(header.getChecksum(), hasher.hash().asBytes())) {
throw new CodingException("Read back a framed log entry that failed its checksum");
}
return Entries.thriftBinaryDecode(Bytes.concat(chunks));
}
private static boolean isFrame(LogEntry logEntry) {
return logEntry.getSetField() == LogEntry._Fields.FRAME;
}
private static boolean isChunk(Frame frame) {
return frame.getSetField() == Frame._Fields.CHUNK;
}
private static boolean isHeader(Frame frame) {
return frame.getSetField() == Frame._Fields.HEADER;
}
private void logBadFrame(FrameHeader header, int chunkIndex) {
LOG.info(String.format("Found an aborted transaction, required %d frames and found %d",
header.getChunkCount(), chunkIndex));
vars.badFramesRead.incrementAndGet();
}
private LogEntry decodeLogEntry(Log.Entry entry) throws CodingException {
byte[] contents = entry.contents();
vars.bytesRead.addAndGet(contents.length);
return Entries.thriftBinaryDecode(contents);
}
@Override
public void truncateBefore(Log.Position position) {
stream.truncateBefore(position);
}
@Override
public StreamTransactionImpl startTransaction() {
return new StreamTransactionImpl();
}
@Override
@Timed("log_manager_snapshot")
public void snapshot(Snapshot snapshot)
throws CodingException, InvalidPositionException, StreamAccessException {
LogEntry entry;
if (deduplicateSnapshots) {
entry = LogEntry.deduplicatedSnapshot(snapshotDeduplicator.deduplicate(snapshot));
} else {
entry = LogEntry.snapshot(snapshot);
}
if (deflateSnapshots) {
entry = deflate(entry);
}
Log.Position position = appendAndGetPosition(entry);
vars.snapshots.incrementAndGet();
vars.unSnapshottedTransactions.set(0);
stream.truncateBefore(position);
}
// Not meant to be subclassed, but timed methods must be non-private.
// See https://github.com/google/guice/wiki/AOP#limitations
@Timed("log_manager_deflate")
protected LogEntry deflate(LogEntry entry) throws CodingException {
return Entries.deflate(entry);
}
// Not meant to be subclassed, but timed methods must be non-private.
// See https://github.com/google/guice/wiki/AOP#limitations
@Timed("log_manager_append")
protected Log.Position appendAndGetPosition(LogEntry logEntry) throws CodingException {
Log.Position firstPosition = null;
byte[][] entries = entrySerializer.serialize(logEntry);
synchronized (writeMutex) { // ensure all sub-entries are written as a unit
for (byte[] entry : entries) {
Log.Position position = stream.append(entry);
if (firstPosition == null) {
firstPosition = position;
}
vars.bytesWritten.addAndGet(entry.length);
}
}
vars.entriesWritten.incrementAndGet();
return firstPosition;
}
final class StreamTransactionImpl implements StreamTransaction {
private final Transaction transaction =
new Transaction().setSchemaVersion(storageConstants.CURRENT_SCHEMA_VERSION);
private final AtomicBoolean committed = new AtomicBoolean(false);
StreamTransactionImpl() {
// supplied by factory method
}
@Override
public Log.Position commit() throws CodingException {
Preconditions.checkState(!committed.getAndSet(true),
"Can only call commit once per transaction.");
if (!transaction.isSetOps()) {
return null;
}
Log.Position position = appendAndGetPosition(LogEntry.transaction(transaction));
vars.unSnapshottedTransactions.incrementAndGet();
return position;
}
@Override
public void add(Op op) {
Preconditions.checkState(!committed.get());
Op prior = transaction.isSetOps() ? Iterables.getLast(transaction.getOps(), null) : null;
if (prior == null || !coalesce(prior, op)) {
transaction.addToOps(op);
}
}
/**
* Tries to coalesce a new op into the prior to compact the binary representation and increase
* batching.
*
* <p>Its recommended that as new {@code Op}s are added, they be treated here although they
* need not be</p>
*
* @param prior The previous op.
* @param next The next op to be added.
* @return {@code true} if the next op was coalesced into the prior, {@code false} otherwise.
*/
private boolean coalesce(Op prior, Op next) {
if (!prior.isSet() && !next.isSet()) {
return false;
}
Op._Fields priorType = prior.getSetField();
if (!priorType.equals(next.getSetField())) {
return false;
}
switch (priorType) {
case SAVE_FRAMEWORK_ID:
prior.setSaveFrameworkId(next.getSaveFrameworkId());
return true;
case SAVE_ACCEPTED_JOB:
case REMOVE_JOB:
case SAVE_QUOTA:
case REMOVE_QUOTA:
return false;
case SAVE_TASKS:
coalesce(prior.getSaveTasks(), next.getSaveTasks());
return true;
case REMOVE_TASKS:
coalesce(prior.getRemoveTasks(), next.getRemoveTasks());
return true;
case SAVE_HOST_ATTRIBUTES:
return coalesce(prior.getSaveHostAttributes(), next.getSaveHostAttributes());
default:
LOG.warning("Unoptimized op: " + priorType);
return false;
}
}
private void coalesce(SaveTasks prior, SaveTasks next) {
if (next.isSetTasks()) {
if (prior.isSetTasks()) {
// It is an expected invariant that an operation may reference a task (identified by
// task ID) no more than one time. Therefore, to coalesce two SaveTasks operations,
// the most recent task definition overrides the prior operation.
Map<String, ScheduledTask> coalesced = Maps.newHashMap();
for (ScheduledTask task : prior.getTasks()) {
coalesced.put(task.getAssignedTask().getTaskId(), task);
}
for (ScheduledTask task : next.getTasks()) {
coalesced.put(task.getAssignedTask().getTaskId(), task);
}
prior.setTasks(ImmutableSet.copyOf(coalesced.values()));
} else {
prior.setTasks(next.getTasks());
}
}
}
private void coalesce(RemoveTasks prior, RemoveTasks next) {
if (next.isSetTaskIds()) {
if (prior.isSetTaskIds()) {
prior.setTaskIds(ImmutableSet.<String>builder()
.addAll(prior.getTaskIds())
.addAll(next.getTaskIds())
.build());
} else {
prior.setTaskIds(next.getTaskIds());
}
}
}
private boolean coalesce(SaveHostAttributes prior, SaveHostAttributes next) {
if (prior.getHostAttributes().getHost().equals(next.getHostAttributes().getHost())) {
prior.getHostAttributes().setAttributes(next.getHostAttributes().getAttributes());
return true;
}
return false;
}
}
}