/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.store.kahadb.disk.journal;
import java.io.IOException;
import java.util.zip.Adler32;
import java.util.zip.Checksum;
import org.apache.activemq.store.kahadb.disk.util.DataByteArrayOutputStream;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.RecoverableRandomAccessFile;
/**
* An optimized writer to do batch appends to a data file. This object is thread
* safe and gains throughput as you increase the number of concurrent writes it
* does.
* The thread calling enqueue does the file open and buffering of the data, which
* reduces the round trip of the write thread.
*
*/
class CallerBufferingDataFileAppender extends DataFileAppender {
final DataByteArrayOutputStream cachedBuffers[] = new DataByteArrayOutputStream[] {
new DataByteArrayOutputStream(maxWriteBatchSize),
new DataByteArrayOutputStream(maxWriteBatchSize)
};
volatile byte flip = 0x1;
public class WriteBatch extends DataFileAppender.WriteBatch {
DataByteArrayOutputStream buff = cachedBuffers[flip ^= 1];
private boolean forceToDisk;
public WriteBatch(DataFile dataFile, int offset, Journal.WriteCommand write) throws IOException {
super(dataFile, offset);
initBuffer(buff);
append(write);
}
@Override
public void append(Journal.WriteCommand write) throws IOException {
super.append(write);
forceToDisk |= appendToBuffer(write, buff);
}
}
@Override
protected DataFileAppender.WriteBatch newWriteBatch(Journal.WriteCommand write, DataFile file) throws IOException {
return new WriteBatch(file, file.getLength(), write);
}
private void initBuffer(DataByteArrayOutputStream buff) throws IOException {
// Write an empty batch control record.
buff.reset();
buff.write(Journal.BATCH_CONTROL_RECORD_HEADER);
buff.writeInt(0);
buff.writeLong(0);
}
public CallerBufferingDataFileAppender(Journal dataManager) {
super(dataManager);
}
/**
* The async processing loop that writes to the data files and does the
* force calls. Since the file sync() call is the slowest of all the
* operations, this algorithm tries to 'batch' or group together several
* file sync() requests into a single file sync() call. The batching is
* accomplished attaching the same CountDownLatch instance to every force
* request in a group.
*/
@Override
protected void processQueue() {
DataFile dataFile = null;
RecoverableRandomAccessFile file = null;
WriteBatch wb = null;
try {
while (true) {
Object o = null;
// Block till we get a command.
synchronized (enqueueMutex) {
while (true) {
if (nextWriteBatch != null) {
o = nextWriteBatch;
nextWriteBatch = null;
break;
}
if (shutdown) {
return;
}
enqueueMutex.wait();
}
enqueueMutex.notifyAll();
}
wb = (WriteBatch)o;
if (dataFile != wb.dataFile) {
if (file != null) {
file.setLength(dataFile.getLength());
dataFile.closeRandomAccessFile(file);
}
dataFile = wb.dataFile;
file = dataFile.openRandomAccessFile();
if( file.length() < journal.preferedFileLength ) {
file.setLength(journal.preferedFileLength);
}
}
final DataByteArrayOutputStream buff = wb.buff;
final boolean forceToDisk = wb.forceToDisk;
ByteSequence sequence = buff.toByteSequence();
// Now we can fill in the batch control record properly.
buff.reset();
buff.skip(5+Journal.BATCH_CONTROL_RECORD_MAGIC.length);
buff.writeInt(sequence.getLength()-Journal.BATCH_CONTROL_RECORD_SIZE);
if( journal.isChecksum() ) {
Checksum checksum = new Adler32();
checksum.update(sequence.getData(), sequence.getOffset()+Journal.BATCH_CONTROL_RECORD_SIZE, sequence.getLength()-Journal.BATCH_CONTROL_RECORD_SIZE);
buff.writeLong(checksum.getValue());
}
// Now do the 1 big write.
file.seek(wb.offset);
if (maxStat > 0) {
if (statIdx < maxStat) {
stats[statIdx++] = sequence.getLength();
} else {
long all = 0;
for (;statIdx > 0;) {
all+= stats[--statIdx];
}
System.err.println("Ave writeSize: " + all/maxStat);
}
}
file.write(sequence.getData(), sequence.getOffset(), sequence.getLength());
ReplicationTarget replicationTarget = journal.getReplicationTarget();
if( replicationTarget!=null ) {
replicationTarget.replicate(wb.writes.getHead().location, sequence, forceToDisk);
}
if (forceToDisk) {
file.sync();
}
Journal.WriteCommand lastWrite = wb.writes.getTail();
journal.setLastAppendLocation(lastWrite.location);
signalDone(wb);
}
} catch (IOException e) {
synchronized (enqueueMutex) {
firstAsyncException = e;
if (wb != null) {
wb.exception.set(e);
wb.latch.countDown();
}
if (nextWriteBatch != null) {
nextWriteBatch.exception.set(e);
nextWriteBatch.latch.countDown();
}
}
} catch (InterruptedException e) {
} finally {
try {
if (file != null) {
dataFile.closeRandomAccessFile(file);
}
} catch (Throwable ignore) {
}
shutdownDone.countDown();
running = false;
}
}
private boolean appendToBuffer(Journal.WriteCommand write, DataByteArrayOutputStream buff) throws IOException {
buff.writeInt(write.location.getSize());
buff.writeByte(write.location.getType());
buff.write(write.data.getData(), write.data.getOffset(), write.data.getLength());
return write.sync | (syncOnComplete && write.onComplete != null);
}
}