Package org.broadinstitute.gatk.engine.datasources.reads

Source Code of org.broadinstitute.gatk.engine.datasources.reads.BlockInputStream

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.datasources.reads;

import htsjdk.samtools.GATKBAMFileSpan;
import htsjdk.samtools.GATKChunk;
import htsjdk.samtools.util.BlockCompressedInputStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

/**
* Presents decompressed blocks to the SAMFileReader.
*/
public class BlockInputStream extends InputStream {
    /**
     * Mechanism for triggering block loads.
     */
    private final BGZFBlockLoadingDispatcher dispatcher;

    /**
     * The reader whose data is supplied by this input stream.
     */
    private final SAMReaderID reader;

    /**
     * Length of the input stream.
     */
    private final long length;

    /**
     * The latest error reported by an asynchronous block load.
     */
    private Throwable error;

    /**
     * Current accessPlan.
     */
    private BAMAccessPlan accessPlan;

    /**
     * A stream of compressed data blocks.
     */
    private final ByteBuffer buffer;

    /**
     * Offsets of the given blocks in the buffer.
     */
    private LinkedList<Integer> blockOffsets = new LinkedList<Integer>();

    /**
     * Source positions of the given blocks in the buffer.
     */
    private LinkedList<Long> blockPositions = new LinkedList<Long>();

    /**
     * Provides a lock to wait for more data to arrive.
     */
    private final Object lock = new Object();

    /**
     * An input stream to use when comparing data back to what it should look like.
     */
    private final BlockCompressedInputStream validatingInputStream;

    /**
     * Create a new block presenting input stream with a dedicated buffer.
     * @param dispatcher the block loading messenger.
     * @param reader the reader for which to load data.
     * @param validate validates the contents read into the buffer against the contents of a Picard BlockCompressedInputStream.
     */
    BlockInputStream(final BGZFBlockLoadingDispatcher dispatcher, final SAMReaderID reader, final boolean validate) {
        this.reader = reader;
        this.length = reader.samFile.length();

        buffer = ByteBuffer.wrap(new byte[64*1024]);
        buffer.order(ByteOrder.LITTLE_ENDIAN);

        // The state of the buffer assumes that the range of data written into the buffer appears in the range
        // [position,limit), while extra capacity exists in the range [limit,capacity)
        buffer.limit(0);

        this.dispatcher = dispatcher;
        // TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
        this.accessPlan = new BAMAccessPlan(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));

        // The block offsets / block positions guarantee that the ending offset/position in the data structure maps to
        // the point in the file just following the last read.  These two arrays should never be empty; initializing
        // to 0 to match the position above.
        this.blockOffsets.add(0);
        this.blockPositions.add(0L);

        try {
            if(validate) {
                System.out.printf("BlockInputStream %s: BGZF block validation mode activated%n",this);
                validatingInputStream = new BlockCompressedInputStream(reader.samFile);
                // A bug in ValidatingInputStream means that calling getFilePointer() immediately after initialization will result in an NPE.
                // Poke the stream to start reading data.
                validatingInputStream.available();
            }
            else
                validatingInputStream = null;
        }
        catch(IOException ex) {
            throw new ReviewedGATKException("Unable to validate against Picard input stream",ex);
        }
    }

    public long length() {
        return length;
    }

    public long getFilePointer() {
        long filePointer;
        synchronized(lock) {
            // Find the current block within the input stream.
            int blockIndex;
            for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() > blockOffsets.get(blockIndex+1); blockIndex++)
                ;
            filePointer = blockPositions.get(blockIndex) + (buffer.position()-blockOffsets.get(blockIndex));
        }

//        if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
//            throw new ReviewedGATKException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
//                    BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer()),
//                    BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer)));

        return filePointer;
    }

    private void clearBuffers() {
        this.accessPlan.reset();

        // Buffer semantics say that outside of a lock, buffer should always be prepared for reading.
        // Indicate no data to be read.
        buffer.clear();
        buffer.limit(0);

        // Clear everything except the last block offset / position
        blockOffsets.clear();
        blockOffsets.add(0);
        while(blockPositions.size() > 1)
            blockPositions.removeFirst();
    }

    public boolean eof() {
        synchronized(lock) {
            // TODO: Handle multiple empty BGZF blocks at end of the file.
            return accessPlan != null && (accessPlan.getBlockAddress() < 0 || accessPlan.getBlockAddress() >= length);
        }
    }

    /**
     * Submits a new access plan for the given dataset and seeks to the given point.
     * @param accessPlan The next seek point for BAM data in this reader.
     */
    public void submitAccessPlan(final BAMAccessPlan accessPlan) {
        //System.out.printf("Thread %s: submitting access plan for block at position: %d%n",Thread.currentThread().getId(),position.getBlockAddress());
        this.accessPlan = accessPlan;
        accessPlan.reset();

        clearBuffers();

        // Pull the iterator past any oddball chunks at the beginning of the shard (chunkEnd < chunkStart, empty chunks, etc).
        // TODO: Don't pass these empty chunks in.
        accessPlan.advancePosition(makeFilePointer(accessPlan.getBlockAddress(),0));

        if(accessPlan.getBlockAddress() >= 0) {
            waitForBufferFill();
        }

        if(validatingInputStream != null) {
            try {
                validatingInputStream.seek(makeFilePointer(accessPlan.getBlockAddress(),0));
            }
            catch(IOException ex) {
                throw new ReviewedGATKException("Unable to validate against Picard input stream",ex);
            }
        }

    }


    private void compactBuffer() {
        // Compact buffer to maximize storage space.
        int bytesToRemove = 0;

        // Look ahead to see if we can compact away the first blocks in the series.
        while(blockOffsets.size() > 1 && buffer.position() >= blockOffsets.get(1)) {
            blockOffsets.remove();
            blockPositions.remove();
            bytesToRemove = blockOffsets.peek();
        }

        // If we end up with an empty block at the end of the series, compact this as well.
        if(buffer.remaining() == 0 && blockOffsets.size() > 1 && buffer.position() >= blockOffsets.peek()) {
            bytesToRemove += buffer.position();
            blockOffsets.remove();
            blockPositions.remove();
        }

        int finalBufferStart = buffer.position() - bytesToRemove;
        int finalBufferSize = buffer.remaining();

        // Position the buffer to remove the unneeded data, and compact it away.
        buffer.position(bytesToRemove);
        buffer.compact();

        // Reset the limits for reading.
        buffer.position(finalBufferStart);
        buffer.limit(finalBufferStart+finalBufferSize);

        // Shift everything in the offset buffer down to accommodate the bytes removed from the buffer.
        for(int i = 0; i < blockOffsets.size(); i++)
            blockOffsets.set(i,blockOffsets.get(i)-bytesToRemove);
    }

    /**
     * Push contents of incomingBuffer into the end of this buffer.
     * MUST be called from a thread that is NOT the reader thread.
     * @param incomingBuffer The data being pushed into this input stream.
     * @param accessPlan target access plan for the data.
     * @param filePosition the current position of the file pointer
     */
    public void copyIntoBuffer(final ByteBuffer incomingBuffer, final BAMAccessPlan accessPlan, final long filePosition) {
        synchronized(lock) {
            try {
                if(validatingInputStream != null) {
                    byte[] validBytes = new byte[incomingBuffer.remaining()];

                    byte[] currentBytes = new byte[incomingBuffer.remaining()];
                    int pos = incomingBuffer.position();
                    int lim = incomingBuffer.limit();
                    incomingBuffer.get(currentBytes);

                    incomingBuffer.limit(lim);
                    incomingBuffer.position(pos);

                    long currentFilePointer = validatingInputStream.getFilePointer();
                    validatingInputStream.seek(makeFilePointer(accessPlan.getBlockAddress(), 0));
                    validatingInputStream.read(validBytes);
                    validatingInputStream.seek(currentFilePointer);

                    if(!Arrays.equals(validBytes,currentBytes))
                        throw new ReviewedGATKException(String.format("Bytes being inserted into BlockInputStream %s are incorrect",this));
                }

                compactBuffer();
                // Open up the buffer for more reading.
                buffer.limit(buffer.capacity());

                // Get the spans overlapping this particular block...
                List<GATKChunk> spansOverlapping = accessPlan.getSpansOverlappingBlock(accessPlan.getBlockAddress(),filePosition);

                // ...and advance the block
                this.accessPlan = accessPlan;
                accessPlan.advancePosition(makeFilePointer(filePosition, 0));

                if(buffer.remaining() < incomingBuffer.remaining())
                    lock.wait();

                final int bytesInIncomingBuffer = incomingBuffer.limit();

                for(GATKChunk spanOverlapping: spansOverlapping) {
                    // Clear out the endcap tracking state and add in the starting position for this transfer.
                    blockOffsets.removeLast();
                    blockOffsets.add(buffer.position());
                    blockPositions.removeLast();
                    blockPositions.add(spanOverlapping.getChunkStart());

                    // Stream the buffer into the data stream.
                    incomingBuffer.limit((spanOverlapping.getBlockEnd() > spanOverlapping.getBlockStart()) ? bytesInIncomingBuffer : spanOverlapping.getBlockOffsetEnd());
                    incomingBuffer.position(spanOverlapping.getBlockOffsetStart());
                    buffer.put(incomingBuffer);

                    // Add the endcap for this transfer.
                    blockOffsets.add(buffer.position());
                    blockPositions.add(spanOverlapping.getChunkEnd());
                }

                // Set up the buffer for reading.
                buffer.flip();

                lock.notify();
            }
            catch(Exception ex) {
                reportException(ex);
                lock.notify();
            }
        }
    }

    void reportException(Throwable t) {
        synchronized(lock) {
            this.error = t;
            lock.notify();
        }
    }

    private void checkForErrors() {
        synchronized(lock) {
            if(error != null) {
                ReviewedGATKException toThrow = new ReviewedGATKException(String.format("Thread %s, BlockInputStream %s: Unable to retrieve BAM data from disk",Thread.currentThread().getId(),this),error);
                toThrow.setStackTrace(error.getStackTrace());
                throw toThrow;
            }
        }
    }

    /**
     * Reads the next byte of data from the input stream.
     * @return Next byte of data, from 0->255, as an int.
     */
    @Override
    public int read() {
        byte[] singleByte = new byte[1];
        read(singleByte);
        return singleByte[0];
    }

    /**
     * Fills the given byte array to the extent possible.
     * @param bytes byte array to be filled.
     * @return The number of bytes actually read.
     */
    @Override
    public int read(byte[] bytes) {
        return read(bytes,0,bytes.length);
    }

    @Override
    public int read(byte[] bytes, final int offset, final int length) {
        int remaining = length;
        synchronized(lock) {
            while(remaining > 0) {
                // Check for error conditions during last read.
                checkForErrors();

                // If completely out of space, queue up another buffer fill.
                waitForBufferFill();

                // Couldn't manage to load any data at all; abort and return what's available.
                if(buffer.remaining() == 0)
                    break;

                int numBytesToCopy = Math.min(buffer.remaining(),remaining);
                buffer.get(bytes,length-remaining+offset,numBytesToCopy);
                remaining -= numBytesToCopy;

                //if(remaining > 0)
                //    System.out.printf("Thread %s: read the first %d bytes of a %d byte request%n",Thread.currentThread().getId(),length-remaining,length);
                // TODO: Assert that we don't copy across a block boundary
            }

            // Notify any waiting threads that some of the contents of the buffer were removed.
            if(length-remaining > 0)
                lock.notify();
        }

//        if(validatingInputStream != null) {
//            byte[] validBytes = new byte[length];
//            try {
//                validatingInputStream.read(validBytes,offset,length);
//                for(int i = offset; i < offset+length; i++) {
//                    if(bytes[i] != validBytes[i])
//                        throw new ReviewedGATKException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
//                }
//            }
//            catch(IOException ex) {
//                throw new ReviewedGATKException("Unable to validate against Picard input stream",ex);
//            }
//        }

        // If any data was copied into the buffer, return the amount of data copied.
        if(remaining < length)
            return length - remaining;

        // Otherwise, return -1.
        return -1;
    }

    public void close() {
        if(validatingInputStream != null) {
            try {
                validatingInputStream.close();
            }
            catch(IOException ex) {
                throw new ReviewedGATKException("Unable to validate against Picard input stream",ex);
            }
        }
    }

    public String getSource() {
        return reader.getSamFilePath();
    }

    private void waitForBufferFill() {
        synchronized(lock) {
            if(buffer.remaining() == 0 && !eof()) {
                //System.out.printf("Thread %s is waiting for a buffer fill from position %d to buffer %s%n",Thread.currentThread().getId(),position.getBlockAddress(),this);
                dispatcher.queueBlockLoad(accessPlan);
                try {
                    lock.wait();
                }
                catch(InterruptedException ex) {
                    throw new ReviewedGATKException("Interrupt occurred waiting for buffer to fill",ex);
                }
            }
        }
    }

    /**
     * Create an encoded BAM file pointer given the address of a BGZF block and an offset.
     * @param blockAddress Physical address on disk of a BGZF block.
     * @param blockOffset Offset into the uncompressed data stored in the BGZF block.
     * @return 64-bit pointer encoded according to the BAM spec.
     */
    public static long makeFilePointer(final long blockAddress, final int blockOffset) {
        return blockAddress << 16 | blockOffset;
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.reads.BlockInputStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.