Package com.afewmoreamps

Source Code of com.afewmoreamps.JitCaskImpl

//Copyright 2012 Ariel Weisberg
//
//Licensed under the Apache License, Version 2.0 (the "License");
//you may not use this file except in compliance with the License.
//You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
//Unless required by applicable law or agreed to in writing, software
//distributed under the License is distributed on an "AS IS" BASIS,
//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//See the License for the specific language governing permissions and
//limitations under the License.
package com.afewmoreamps;

import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import com.afewmoreamps.KeyDir.SubKeyDir;
import com.afewmoreamps.util.COWSortedMap;
import com.afewmoreamps.util.SettableFuture;
import com.google.common.util.concurrent.*;

class JitCaskImpl implements JitCask, Iterable<CaskEntry> {

    /**
     * If this flag is set then put will not return until
     * the data is fsynced
     */
    public static final int PUTFLAG_SYNC = 1 << 1;

    private final File m_caskPath;

    private final ListeningExecutorService m_writeThread;
    private final ListeningExecutorService m_compressionThreads;
    private final ListeningExecutorService m_readThreads;
    private final ListeningScheduledExecutorService m_syncThread;

    private ScheduledFuture<?> m_syncTaskRunner;
    private ListenableFutureTask<Long> m_nextSyncTask;

    private final KeyDir m_keyDir = new KeyDir(READ_QUEUE_DEPTH);

    private final COWSortedMap<Integer, MiniCask> m_miniCasks = new COWSortedMap<Integer, MiniCask>();

    private MiniCask m_outCask;

    private int m_nextMiniCaskIndex = 0;

    private final boolean m_syncByDefault;
    private final int m_syncInterval;
    private final int m_maxValidValueSize;

    private static final int READ_QUEUE_DEPTH = 64;
    private final Semaphore m_maxOutstandingReads = new Semaphore(READ_QUEUE_DEPTH);
    private final Semaphore m_maxOutstandingWrites = new Semaphore(64);
    private final ConcurrentLinkedQueue<MiniCask> m_finishedCasksPendingSync =
        new ConcurrentLinkedQueue<MiniCask>();

    public JitCaskImpl(CaskConfig config) throws IOException {
        m_syncByDefault = config.syncByDefault;
        m_syncInterval = config.syncInterval;
        m_caskPath = config.caskPath;
        m_maxValidValueSize = config.maxValidValueSize;

        if (!m_caskPath.exists()) {
            throw new IOException("Path " + m_caskPath + " does not exist");
        }
        if (!m_caskPath.isDirectory()) {
            throw new IOException("Path " + m_caskPath + " exists but is not a directory");
        }
        if (!m_caskPath.canRead()) {
            throw new IOException("Path " + m_caskPath + " is not readable");
        }
        if (!m_caskPath.canWrite()) {
            throw new IOException("Path " + m_caskPath + " is not writable");
        }
        if (!m_caskPath.canExecute()) {
            throw new IOException("Path " + m_caskPath + " is not executable");
        }

        ThreadFactory tf = new ThreadFactory() {

            @Override
            public Thread newThread(Runnable r) {
                Thread t = new Thread( r, "JitCask[" + m_caskPath + "] Write Thread");
                t.setDaemon(true);
                return t;
            }

        };
        m_writeThread = MoreExecutors.listeningDecorator(
                new ThreadPoolExecutor(
                        1,
                        1,
                        0,
                        TimeUnit.MILLISECONDS,
                        new LinkedBlockingQueue<Runnable>(),
                        tf));

        tf = new ThreadFactory() {

            @Override
            public Thread newThread(Runnable r) {
                Thread t = new Thread( r, "JitCask[" + m_caskPath + "] Sync Thread");
                t.setDaemon(true);
                return t;
            }

        };
        m_syncThread = MoreExecutors.listeningDecorator(
                new ScheduledThreadPoolExecutor(
                1,
                tf));

        tf = new ThreadFactory() {
            private final AtomicInteger m_counter = new AtomicInteger();

            @Override
            public Thread newThread(Runnable r) {
                Thread t = new Thread(
                        null,
                        r,
                        "JitCask[" + m_caskPath + "] Read Thread " + m_counter.incrementAndGet(),
                        1024 * 256);
                t.setDaemon(true);
                return t;
            }

        };
        m_readThreads = MoreExecutors.listeningDecorator(
                new ThreadPoolExecutor(
                        READ_QUEUE_DEPTH,
                        READ_QUEUE_DEPTH,
                        0,
                        TimeUnit.MILLISECONDS,
                        new LinkedBlockingQueue<Runnable>(),
                        tf,
                        new ThreadPoolExecutor.AbortPolicy()));

        tf = new ThreadFactory() {
            private final AtomicInteger m_counter = new AtomicInteger();

            @Override
            public Thread newThread(Runnable r) {
                Thread t = new Thread(
                        null,
                        r,
                        "JitCask[" + m_caskPath + "] Compression Thread " + m_counter.incrementAndGet(),
                        1024 * 256);
                t.setDaemon(true);
                return t;
            }

        };

        final int availableProcs = Runtime.getRuntime().availableProcessors() / 2;
        m_compressionThreads =
                MoreExecutors.listeningDecorator(
                        new ThreadPoolExecutor(
                                availableProcs,
                                availableProcs,
                                0,
                                TimeUnit.MILLISECONDS,
                                new LinkedBlockingQueue<Runnable>(availableProcs),
                                tf,
                                new ThreadPoolExecutor.CallerRunsPolicy()));
    }

    @Override
    public synchronized void open() throws IOException {
        if (m_readThreads.isShutdown()) {
            throw new IOException("Can't reuse a closed JitCask");
        }
        reloadJitCask();
        m_outCask = new MiniCask(
                m_caskPath,
                m_nextMiniCaskIndex,
                null,
                m_maxValidValueSize);
        m_miniCasks.put(m_nextMiniCaskIndex, m_outCask);
        m_nextMiniCaskIndex++;

        m_syncTaskRunner = m_syncThread.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                try {
                    ListenableFutureTask<Long> currentSyncTask = m_nextSyncTask;
                    m_nextSyncTask = ListenableFutureTask.create(new Callable<Long>() {
                        @Override
                        public Long call() {
                            /*
                             * Catch throwable since we don't ever want to stop syncing.
                             */
                            try {
                                final long  start = System.currentTimeMillis();
                                sync();
                                final long end = System.currentTimeMillis();
                                final long delta = System.currentTimeMillis() - start;
                                if (delta > m_syncInterval) {
                                    System.err.println("Missed sync interval by " + delta);
                                }
                                return end;
                            } catch (Throwable t) {
                                t.printStackTrace();
                                return Long.MIN_VALUE;
                            }
                        }
                    });
                    if (currentSyncTask != null) {
                        currentSyncTask.run();
                    }
                } catch (Throwable t) {
                    t.printStackTrace();
                }
            }
        }, 0, m_syncInterval, TimeUnit.MILLISECONDS);

    }

    private void sync() throws IOException {
        MiniCask cask;
        while ((cask = m_finishedCasksPendingSync.poll()) != null) {
            cask.sync();
        }
        m_outCask.sync();
    }

    private void reloadJitCask() throws IOException {
        int highestIndex = -1;
        for (File f : m_caskPath.listFiles()) {
            if (f.getName().endsWith(".hintcask")) continue;
            if (!f.getName().endsWith(".minicask")) {
                throw new IOException("Unrecognized file " + f + " found in cask directory");
            }
            String fields[] = f.getName().substring(0, f.getName().length() - 9).split("-");
            int caskIndex = Integer.valueOf(fields[0]);
            long timestamp = Integer.valueOf(fields[1]);
            highestIndex = Math.max(caskIndex, highestIndex);
            MiniCask cask =
                new MiniCask(
                        f.getParentFile(),
                        caskIndex,
                        timestamp,
                        m_maxValidValueSize);
            m_miniCasks.put(caskIndex, cask);
        }
        m_nextMiniCaskIndex = highestIndex + 1;

        for (MiniCask miniCask : m_miniCasks.values()) {
            final Iterator<CaskEntry> iter = miniCask.getReloadIterator();
            while (iter.hasNext()) {
                final CaskEntry ce = iter.next();
                final byte keyHashBytes[] = new byte[KDEntry.SIZE];
                System.arraycopy(ce.keyHash, 0, keyHashBytes, 0, 20);
                final SubKeyDir subKeyDir = m_keyDir.getSubKeyDir(keyHashBytes);

                if (ce.valuePosition == -1) {
                    subKeyDir.m_keys.remove(keyHashBytes);
                    continue;
                }

                KDEntry.toBytes(
                        keyHashBytes,
                        ce.miniCask.m_fileId,
                        ce.valuePosition);
                subKeyDir.m_keys.put(
                        keyHashBytes,
                        keyHashBytes);
            }
        }

        /*
         * Remove all the tombstones from memory because the merge thread might have moved stuff out of order?
         * Really want to see if I can make merging not change the order of stuff
         */
        for (SubKeyDir subKeyDir : m_keyDir.m_subKeyDirs.values()) {
            Iterator<Map.Entry<byte[], byte[]>> iter = subKeyDir.m_keys.entrySet().iterator();
            while (iter.hasNext()) {
                Map.Entry<byte[], byte[]> entry = iter.next();
                KDEntry kdEntry = new KDEntry(entry.getValue());
                if (kdEntry.valuePos == -1) {
                    iter.remove();
                }
            }
        }
    }

    @Override
    public ListenableFuture<GetResult> get(final byte[] key) {
        final long start = System.currentTimeMillis();

        try {
            m_maxOutstandingReads.acquire();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return m_readThreads.submit(new Callable<GetResult>() {
            @Override
            public GetResult call() throws Exception {
                try {
                    while (true) {
                        MessageDigest md = MessageDigest.getInstance("SHA-1");
                        byte keyHash[] = md.digest(key);
                        final byte decoratedKey[] = KeyDir.decorateKeyHash(keyHash);
                        KDEntry entry = m_keyDir.get(decoratedKey);
                        if (entry == null) {
                            return null;
                        }

                        final MiniCask mc = m_miniCasks.get(entry.fileId);
                        /*
                         * Race condition, file was deleted after looking up entry.
                         * Loop again and find out the state of the key from the KeyDir
                         * again.
                         */
                        if (mc == null) {
                            continue;
                        }

                        /*
                         * Potentially compressed and uncompressed value, or just the uncompressed
                         * value wrapped in a ListenableFutureTask
                         */
                        ByteBuffer value = mc.getValue(entry.valuePos);
                        return new GetResult(
                                key,
                                value,
                                (int)(System.currentTimeMillis() - start));
                    }
                } finally {
                    m_maxOutstandingReads.release();
                }
            }
        });
    }

    @Override
    public ListenableFuture<PutResult> put(byte[] key, byte[] value) {
        return put(
                key,
                value,
                m_syncByDefault);
    }

    @Override
    public ListenableFuture<PutResult> put(final byte[] key, final byte[] value, final boolean waitForSync) {
        if (key == null || value == null) {
            throw new IllegalArgumentException();
        }

        final int uncompressedSize =
                key.length + value.length + MiniCask.HEADER_SIZE + 8;//8 is the length prefixes in the compressed entry for the key and value
        /*
         * Record when the put started
         */
        final long start = System.currentTimeMillis();

        /*
         * This is the return value that will be set with the result
         * or any exceptions thrown during the put
         */
        final SettableFuture<PutResult> retval = SettableFuture.create();

        /*
         * If compression is requested, attempt to compress the value
         * and generate the CRC in a separate thread pool before submitting
         * to the single write thread. This allows parallelism for what is potentially
         * the more CPU intensive part of a write. Can't have more write
         * threads so best to scale out as far as possible before giving it work.
         */
        final ListenableFuture<Object[]> assembledEntryFuture =
            m_compressionThreads.submit(new Callable<Object[]>() {
                @Override
                public Object[] call() throws Exception {
                    return MiniCask.constructEntry(key, value);
                }
        });

        /*
         * Limit the maximum number of outstanding writes
         * to avoid OOM in naive benchmarks/applications
         */
        try {
            m_maxOutstandingWrites.acquire();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        retval.addListener(new Runnable() {
            @Override
            public void run() {
                m_maxOutstandingWrites.release();
            }
        },
        MoreExecutors.sameThreadExecutor());

        /*
         * Submit the write to the single write thread
         * which will write the kv pair to the current file and
         * upsert it into the keydir. If sync was not requested
         * then the retval future will be set as soon as the write
         * thread writes the new kv pair to the memory mapped file (page cache).
         * Otherwise it adds a listener to the next sync task that will set the value
         * once sync has been performed.
         */
        m_writeThread.execute(new Runnable() {
            @Override
            public void run() {
                /*
                 * Retrieve the compression results, forwarding any exceptions
                 * to the retval future.
                 */
                Object assembledEntry[];
                try {
                    assembledEntry = assembledEntryFuture.get();
                } catch (Throwable t) {
                    retval.setException(t);
                    return;
                }

                final byte entryBytes[] = (byte[])assembledEntry[0];
                final byte keyHash[] = (byte[])assembledEntry[1];

                try {
                    putImpl( entryBytes, keyHash, false);
                } catch (Throwable t) {
                    retval.setException(t);
                    return;
                }

                /*
                 * If the put requested waiting for sync then don't set the retval future
                 * immediately. Add a listener for the next sync task that will do it
                 * once the data is really durable.
                 *
                 * Otherwise set it immediately and use the current time to reflect the latency
                 * of the put
                 */
                if (waitForSync) {
                    final ListenableFuture<Long> syncTask = m_nextSyncTask;
                    syncTask.addListener( new Runnable() {
                        @Override
                        public void run() {
                            try {
                                retval.set(
                                        new PutResult(
                                                uncompressedSize,
                                                entryBytes.length,
                                                (int)(syncTask.get() - start)));
                            } catch (Throwable t) {
                                retval.setException(t);
                                return;
                            }
                        }
                    }, MoreExecutors.sameThreadExecutor());
                } else {
                    retval.set(
                        new PutResult(
                            uncompressedSize,
                            entryBytes.length,
                             (int)(System.currentTimeMillis() - start)));
                }
            }
        });

        return retval;
    }

    private void putImpl(
            byte entry[],
            byte keyHash[],
            boolean isTombstone) throws IOException {
        assert(keyHash.length == 20);
        if (!m_outCask.addEntry(entry, keyHash,isTombstone, m_keyDir)) {
            m_outCask = new MiniCask(
                    new File(m_caskPath, m_nextMiniCaskIndex + ".minicask"),
                    m_nextMiniCaskIndex,
                    null,
                    m_maxValidValueSize);
            m_miniCasks.put(m_nextMiniCaskIndex, m_outCask);
            m_nextMiniCaskIndex++;
            if (!m_outCask.addEntry(entry, keyHash, isTombstone, m_keyDir)) {
                throw new IOException("Unable to place value in an empty bitcask, should never happen");
            }
        }
    }

    @Override
    public ListenableFuture<RemoveResult> remove(final byte key[]) {
        return remove(key, m_syncByDefault);
    }

    @Override
    public ListenableFuture<RemoveResult> remove(final byte[] key, final boolean waitForSync) {
        if (key == null) {
            throw new IllegalArgumentException();
        }

        final long start = System.currentTimeMillis();

        try {
            m_maxOutstandingWrites.acquire();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

        final SettableFuture<RemoveResult> retval = SettableFuture.create();
        retval.addListener(new Runnable() {
            @Override
            public void run() {
                m_maxOutstandingWrites.release();
            }
        },
        MoreExecutors.sameThreadExecutor());

        MessageDigest md = null;
        try {
            md = MessageDigest.getInstance("SHA-1");
        } catch (NoSuchAlgorithmException e) {
            retval.setException(e);
            return retval;
        }

        final byte keyHash[] = md.digest(key);

        m_writeThread.execute(new Runnable() {
            @Override
            public void run() {
                KDEntry entry = m_keyDir.get(KeyDir.decorateKeyHash(keyHash));
                if (entry == null) {
                    retval.set(new RemoveResult((int)(System.currentTimeMillis() - start)));
                    return;
                }
                final MiniCask mc = m_miniCasks.get(entry.fileId);
                try {
                    putImpl( MiniCask.constructTombstoneEntry(keyHash, entry.fileId, mc.m_timestamp), keyHash, true);
                } catch (Throwable t) {
                    retval.setException(t);
                    return;
                }
                if (waitForSync) {
                    final ListenableFuture<Long> syncTask = m_nextSyncTask;
                    syncTask.addListener(new Runnable() {
                        @Override
                        public void run() {
                            try {
                                retval.set(
                                        new RemoveResult(
                                                (int)(syncTask.get() - start)));
                            } catch (Throwable t) {
                                retval.setException(t);
                                return;
                            }
                        }
                    }, MoreExecutors.sameThreadExecutor());
                } else {
                    retval.set(
                            new RemoveResult(
                                    (int)(System.currentTimeMillis() - start)));
                }
            }
        });
        return retval;
    }

    @Override
    public synchronized void close() throws IOException {
        m_readThreads.shutdown();
        m_writeThread.shutdown();
        try {
            m_readThreads.awaitTermination(365, TimeUnit.DAYS);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        try {
            m_writeThread.awaitTermination(365, TimeUnit.DAYS);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        m_syncTaskRunner.cancel(false);

        m_syncThread.shutdown();
        try {
            m_syncThread.awaitTermination(365, TimeUnit.DAYS);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        ListenableFutureTask<Long> syncTask = m_nextSyncTask;
        m_nextSyncTask = null;

        /*
         * Very cheesy hack to make sure the reference to m_nextSyncTask it was leaked is done being used.
         * I don't think it can actually be leaked because the write thread is shutdown
         * and it is the only one that is supposed to register listeners
         */
        try {
            Thread.sleep(200);

        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        /*
         * Run the last sync task to make sure any dangling listeners are synced and notified
         */
        syncTask.run();

        /*
         * Now close all the files
         */
        try {
            for (MiniCask mc : m_miniCasks.values()) {
                try {
                    mc.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        } finally {
            m_miniCasks.clear();
            /*
             * Would really love those buffers to get unmapped
             */
            for (int ii = 0; ii < 10; ii++) {
                System.gc();
            }
        }
    }

    @Override
    public Iterator<CaskEntry> iterator() {
        final Map<Integer, MiniCask> casks = m_miniCasks.get();

        if (casks.isEmpty()) {
            return new Iterator<CaskEntry>() {

                @Override
                public boolean hasNext() {
                    return false;
                }

                @Override
                public CaskEntry next() {
                    throw new NoSuchElementException();
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

            };
        }

        return new Iterator<CaskEntry>() {

            Iterator<MiniCask> caskIterator = casks.values().iterator();
            MiniCask currentCask = caskIterator.next();
            Iterator<CaskEntry> entryIterator = currentCask.iterator();

            @Override
            public boolean hasNext() {
                if (caskIterator == null) {
                    return false;
                }
                while (!entryIterator.hasNext()) {
                    if (caskIterator.hasNext()) {
                        currentCask = caskIterator.next();
                        entryIterator = currentCask.iterator();
                    } else {
                        caskIterator = null;
                        return false;
                    }
                }
                return true;
            }

            @Override
            public CaskEntry next() {
                if (caskIterator == null) {
                    throw new NoSuchElementException();
                }
                assert(entryIterator.hasNext());
                return entryIterator.next();
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }

        };
    }

}
TOP

Related Classes of com.afewmoreamps.JitCaskImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.