Package org.broadinstitute.gatk.engine.datasources.providers

Source Code of org.broadinstitute.gatk.engine.datasources.providers.IntervalOverlappingRODsFromStream

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.datasources.providers;

import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.samtools.util.PeekableIterator;
import org.broadinstitute.gatk.engine.refdata.RODRecordListImpl;
import org.broadinstitute.gatk.engine.refdata.utils.GATKFeature;
import org.broadinstitute.gatk.engine.refdata.utils.RODRecordList;
import org.broadinstitute.gatk.utils.GenomeLoc;

import java.util.Collection;
import java.util.LinkedList;
import java.util.ListIterator;

/**
* Key algorithmic helper for ReadBasedReferenceOrderedData
*
* Takes a single iterator of features, and provides a single capability that returns
* the list of RODs that overlap an interval.  Allows sequential getOverlapping calls
* from intervals provided that these intervals always have increasing getStart() values.
*
*/
class IntervalOverlappingRODsFromStream {
    /**
     * Only held for QC purposes
     */
    GenomeLoc lastQuery = null;

    private final String name;
    private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
    private final PeekableIterator<RODRecordList> futureFeatures;

    /**
     * Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
     * returns RODRecordLists having name
     *
     * @param name
     * @param futureFeatures
     */
    IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
        if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");

        this.name = name;
        this.futureFeatures = futureFeatures;
    }

    /**
     * Get the list of RODs overlapping loc from this stream of RODs.
     *
     * @param loc the interval to query
     * @return a non-null RODRecordList containing the overlapping RODs, which may be empty
     */
    @Ensures({"overlaps(loc, result)",
            "! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
            "result != null"})
    public RODRecordList getOverlapping(final GenomeLoc loc) {
        if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
            throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));

        readOverlappingFutureFeatures(loc);
        return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
    }


    /**
     * For contract assurance.  Checks that all bindings in loc overlap
     *
     * @param loc
     * @param bindings
     * @return
     */
    @Requires({"loc != null", "bindings != null"})
    private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
        for ( final GATKFeature feature : bindings )
            if ( ! feature.getLocation().overlapsP(loc) )
                return false;
        return true;
    }

    /**
     * Subset the features in all to those that overlap with loc
     *
     * The current features list contains everything read that cannot be thrown away yet, but not
     * everything in there necessarily overlaps with loc.  Subset to just those that do overlap
     *
     * @param loc the location that features must overlap
     * @param all the list of all features
     * @return a subset of all that overlaps with loc
     */
    @Requires({"loc != null", "all != null"})
    @Ensures("result.size() <= all.size()")
    private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
        final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
        for ( final GATKFeature feature : all )
            if ( feature.getLocation().overlapsP(loc) )
                overlapping.add(feature);
        return overlapping;
    }

    /**
     * Update function.  Remove all elements of currentFeatures that end before loc
     *
     * Must be called by clients periodically when they know they they will never ask for data before
     * loc, so that the running cache of RODs doesn't grow out of control.
     *
     * @param loc the location to use
     */
    @Requires("loc != null")
    @Ensures("currentFeatures.size() <= old(currentFeatures.size())")
    public void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
        final ListIterator<GATKFeature> it = currentFeatures.listIterator();
        while ( it.hasNext() ) {
            final GATKFeature feature = it.next();
            if ( feature.getLocation().isBefore(loc) )
                it.remove();
        }
    }

    /**
     * Update function: Read all elements from futureFeatures that overlap with loc
     *
     * Stops at the first element that starts before the end of loc, or the stream empties
     *
     * @param loc
     */
    @Requires("loc != null")
    @Ensures("currentFeatures.size() >= old(currentFeatures.size())")
    private void readOverlappingFutureFeatures(final GenomeLoc loc) {
        while ( futureFeatures.hasNext() ) {
            final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
            if ( nextLoc.isBefore(loc) ) {
                futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
            } else if ( nextLoc.isPast(loc) ) {
                break; // next element is past loc, stop looking but don't pop it
            } else if ( nextLoc.overlapsP(loc) ) {
                // add overlapping elements to our current features, removing from stream
                for ( final GATKFeature feature : futureFeatures.next() ) {
                    currentFeatures.add(feature);
                }
            }
        }
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.providers.IntervalOverlappingRODsFromStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.