Package org.voltdb.utils

Source Code of org.voltdb.utils.PBDSegment

/* This file is part of VoltDB.
* Copyright (C) 2008-2014 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.utils;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;

import org.voltcore.logging.VoltLogger;
import org.voltcore.utils.Bits;
import org.voltcore.utils.DBBPool;
import org.voltcore.utils.DBBPool.BBContainer;
import org.voltcore.utils.DBBPool.MBBContainer;
import org.voltdb.utils.BinaryDeque.OutputContainerFactory;
import org.xerial.snappy.Snappy;

/**
* Objects placed in the queue are stored in file segments that are up to 64 megabytes.
* Segments only support appending objects. A segment will throw an IOException if an attempt
* to insert an object that exceeds the remaining space is made. A segment can be used
* for reading and writing, but not both at the same time.
*
*/
class PBDSegment {
    private static final VoltLogger LOG = new VoltLogger("HOST");

    public static final int FLAG_COMPRESSED = 1;

    //Avoid unecessary sync with this flag
    private boolean m_syncedSinceLastEdit = true;
    final File m_file;
    private RandomAccessFile m_ras;
    private FileChannel m_fc;
    private MBBContainer m_buf;
    private ByteBuffer m_readBuf;

    //If this is the first time polling a segment, madvise the entire thing
    //into memory
    private boolean m_haveMAdvised = false;

    //Index of the next object to read, not an offset into the file
    //The offset is maintained by the ByteBuffer. Used to determine if there is another object
    int m_objectReadIndex = 0;
    private int m_bytesRead = 0;

    //ID of this segment
    final Long m_index;
    static final int m_chunkSize = (1024 * 1024) * 64;
    static final int m_objectHeaderBytes = 8;
    static final int m_segmentHeaderBytes = 8;

    static final int COUNT_OFFSET = 0;
    static final int SIZE_OFFSET = 4;

    private boolean m_closed = false;

    //How many entries that have been polled have from this file have been discarded.
    //Convenient to let PBQ maintain the counter here
    int m_discardCount = 0;

    public PBDSegment(Long index, File file ) {
        m_index = index;
        m_file = file;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Creating Segment: " + file.getName() + " At Index: " + m_index);
        }
    }

    int getNumEntries() throws IOException {
        if (m_fc == null) {
            open(false);
        }
        if (m_fc.size() > m_segmentHeaderBytes) {
            final int numEntries = m_buf.b().getInt(0);
            return numEntries;
        } else {
            return 0;
        }
    }

    private void initNumEntries() throws IOException {
        final ByteBuffer buf = m_buf.b();
        buf.putInt(0, 0);
        buf.putInt(4, 0);
        m_syncedSinceLastEdit = false;
    }

    private void incrementNumEntries(int size) throws IOException {
        final ByteBuffer buf = m_buf.b();
        //First read the existing amount
        buf.putInt(COUNT_OFFSET, buf.getInt(COUNT_OFFSET) + 1);
        buf.putInt(SIZE_OFFSET, buf.getInt(SIZE_OFFSET) + size);
        m_syncedSinceLastEdit = false;
    }

    void open(boolean forWrite) throws IOException {
        if (!m_file.exists()) {
            m_syncedSinceLastEdit = false;
        }
        if (m_ras != null) {
            throw new IOException(m_file + " was already opened");
        }
        m_ras = new RandomAccessFile( m_file, "rw");
        m_fc = m_ras.getChannel();

        if (forWrite) {
            //If this is for writing, map the chunk size RW and put the buf positions at the start
            m_buf = DBBPool.wrapMBB(m_fc.map(MapMode.READ_WRITE, 0, m_chunkSize));
            m_buf.b().position(SIZE_OFFSET + 4);
            m_readBuf = m_buf.b().duplicate();
            initNumEntries();
        } else {
            //If it isn't for write, map read only to the actual size and put the write buf position at the end
            //so size is reported correctly
            final long size = m_fc.size();
            m_buf = DBBPool.wrapMBB(m_fc.map(MapMode.READ_ONLY, 0, size));
            m_readBuf = m_buf.b().duplicate();
            m_buf.b().position((int) size);
            m_readBuf.position(SIZE_OFFSET + 4);
        }
    }

    public void closeAndDelete() throws IOException {
        close();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Deleting segment at Index " + m_index + " File: " + m_file.getAbsolutePath());
        }
        m_file.delete();
    }

    public void close() throws IOException {
        try {
            if (m_fc != null) {
                m_fc.close();
                m_ras = null;
                m_fc = null;
                m_buf.discard();
                m_buf = null;
                m_readBuf = null;
            }
        } finally {
            m_closed = true;
        }
    }

    void sync() throws IOException {
        if (m_closed) throw new IOException("closed");
        if (!m_syncedSinceLastEdit) {
            m_buf.b().force();
        }
        m_syncedSinceLastEdit = true;
    }

    boolean hasMoreEntries() throws IOException {
        if (m_closed) throw new IOException("closed");
        return m_objectReadIndex < m_buf.b().getInt(COUNT_OFFSET);
    }

    boolean offer(BBContainer cont, boolean compress) throws IOException {
        if (m_closed) throw new IOException("closed");
        final ByteBuffer buf = cont.b();
        final int remaining = buf.remaining();
        if (remaining < 32 || !buf.isDirect()) compress = false;
        final int maxCompressedSize = compress ? Snappy.maxCompressedLength(remaining) : remaining;
        final ByteBuffer mbuf = m_buf.b();
        if (mbuf.remaining() < maxCompressedSize + m_objectHeaderBytes) return false;


        m_syncedSinceLastEdit = false;
        try {
            //Leave space for length prefix and flags
            final int objSizePosition = mbuf.position();
            mbuf.position(mbuf.position() + m_objectHeaderBytes);

            int written = maxCompressedSize;
            if (compress) {
                //Calculate destination pointer and compress directly to file
                final long destAddr = m_buf.address() + mbuf.position();
                written = (int)Snappy.rawCompress(cont.address() + buf.position(), remaining, destAddr);
                mbuf.position(mbuf.position() + written);
            } else {
                mbuf.put(buf);
            }

            //Record the size of the compressed object and update buffer positions
            //and whether the object was compressed
            mbuf.putInt(objSizePosition, written);
            mbuf.putInt(objSizePosition + 4, compress ? FLAG_COMPRESSED: 0);
            buf.position(buf.limit());
            incrementNumEntries(remaining);
        } finally {
            cont.discard();
        }

        return true;
    }

    BBContainer poll(OutputContainerFactory factory) throws IOException {
        if (m_closed) throw new IOException("closed");
        final long mBufAddr = m_buf.address();
        if (!m_haveMAdvised) {
            final ByteBuffer mbuf = m_buf.b();
            m_haveMAdvised = true;
            final long retval = PosixAdvise.madvise(
                    m_buf.address(),
                    mbuf.position(),
                    PosixAdvise.POSIX_MADV_WILLNEED);
            if (retval != 0) {
                LOG.warn("madvise will need failed: " + retval);
            }
        }

        //No more entries to read
        if (!hasMoreEntries()) {
            return null;
        }

        m_objectReadIndex++;

        //Get the length prefix and then read the object
        final int nextCompressedLength = m_readBuf.getInt();
        final int nextFlags = m_readBuf.getInt();

        //Check for compression
        final boolean compressed = nextFlags == FLAG_COMPRESSED;
        //Determine the length of the object if uncompressed
        final int nextUncompressedLength = compressed ? (int)Snappy.uncompressedLength(mBufAddr + m_readBuf.position(), nextCompressedLength) : nextCompressedLength;
        m_bytesRead += nextUncompressedLength;

        if (compressed) {
            //Get storage for output
            final BBContainer retcont = factory.getContainer(nextUncompressedLength);
            final ByteBuffer retbuf = retcont.b();

            //Limit to appropriate uncompressed size
            retbuf.limit(nextUncompressedLength);

            //Uncompress to output buffer
            final long sourceAddr = mBufAddr + m_readBuf.position();
            final long destAddr = retcont.address();
            Snappy.rawUncompress(sourceAddr, nextCompressedLength, destAddr);
            m_readBuf.position(m_readBuf.position() + nextCompressedLength);
            return retcont;
        } else {
            //Return a slice
            final int oldLimit = m_readBuf.limit();
            m_readBuf.limit(m_readBuf.position() + nextUncompressedLength);
            ByteBuffer retbuf = m_readBuf.slice();
            m_readBuf.position(m_readBuf.limit());
            m_readBuf.limit(oldLimit);

            /*
             * For uncompressed data, touch all the pages to make 100% sure
             * they are available since they will be accessed directly.
             *
             * This code mimics MappedByteBuffer.load, but without the expensive
             * madvise call for data we are 99% sure was already madvised.
             *
             * This would only ever be an issue in the unlikely event that the page cache
             * is trashed at the wrong moment or we are very low on memory
             */
            final BBContainer dummyCont = DBBPool.dummyWrapBB(retbuf);
            Bits.readEveryPage(dummyCont);
            return dummyCont;
        }
    }

    /*
     * Don't use size in bytes to determine empty, could potentially
     * diverge from object count on crash or power failure
     * although incredibly unlikely
     */
    int sizeInBytes() {
        if (m_closed) throw new RuntimeException("closed");
        return Math.max(0, m_buf.b().getInt(SIZE_OFFSET) - m_bytesRead);
    }
}
TOP

Related Classes of org.voltdb.utils.PBDSegment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.