Package org.broadinstitute.gatk.engine.datasources.providers

Source Code of org.broadinstitute.gatk.engine.datasources.providers.IntervalReferenceOrderedViewUnitTest$CompareFeatures

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.datasources.providers;

import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.tribble.BasicFeature;
import htsjdk.tribble.Feature;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.engine.refdata.RODRecordListImpl;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.refdata.utils.GATKFeature;
import org.broadinstitute.gatk.engine.refdata.utils.RODRecordList;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.*;

/**
* @author depristo
*/
public class IntervalReferenceOrderedViewUnitTest extends BaseTest {
    private static int startingChr = 1;
    private static int endingChr = 2;
    private static int readCount = 100;
    private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
    private static String contig;
    private static SAMFileHeader header;

    private GenomeLocParser genomeLocParser;

    @BeforeClass
    public void beforeClass() {
        header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
        contig = header.getSequence(0).getSequenceName();
        genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());

        initializeTests();
    }

    private class CompareFeatures implements Comparator<Feature> {
        @Override
        public int compare(Feature o1, Feature o2) {
            return genomeLocParser.createGenomeLoc(o1).compareTo(genomeLocParser.createGenomeLoc(o2));
        }
    }

    private class ReadMetaDataTrackerRODStreamTest extends TestDataProvider {
        final List<Feature> allFeatures;
        final List<GenomeLoc> intervals;

        public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final GenomeLoc interval) {
            this(allFeatures, Collections.singletonList(interval));
        }

        public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final List<GenomeLoc> intervals) {
            super(ReadMetaDataTrackerRODStreamTest.class);
            this.allFeatures = new ArrayList<Feature>(allFeatures);
            Collections.sort(this.allFeatures, new CompareFeatures());
            this.intervals = new ArrayList<GenomeLoc>(intervals);
            Collections.sort(this.intervals);
            setName(String.format("%s nFeatures %d intervals %s", getClass().getSimpleName(), allFeatures.size(),
                    intervals.size() == 1 ? intervals.get(0) : "size " + intervals.size()));
        }

        public PeekableIterator<RODRecordList> getIterator(final String name) {
            return new PeekableIterator<RODRecordList>(new TribbleIteratorFromCollection(name, genomeLocParser, allFeatures));
        }

        public Set<Feature> getExpectedOverlaps(final GenomeLoc interval) {
            final Set<Feature> overlapping = new HashSet<Feature>();
            for ( final Feature f : allFeatures )
                if ( genomeLocParser.createGenomeLoc(f).overlapsP(interval) )
                    overlapping.add(f);
            return overlapping;
        }
    }

    public void initializeTests() {
        final List<Feature> handPickedFeatures = new ArrayList<Feature>();

        handPickedFeatures.add(new BasicFeature(contig, 1, 1));
        handPickedFeatures.add(new BasicFeature(contig, 2, 5));
        handPickedFeatures.add(new BasicFeature(contig, 4, 4));
        handPickedFeatures.add(new BasicFeature(contig, 6, 6));
        handPickedFeatures.add(new BasicFeature(contig, 9, 10));
        handPickedFeatures.add(new BasicFeature(contig, 10, 10));
        handPickedFeatures.add(new BasicFeature(contig, 10, 11));
        handPickedFeatures.add(new BasicFeature(contig, 13, 20));

        createTestsForFeatures(handPickedFeatures);

        // test in the present of a large spanning element
        {
            List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
            oneLargeSpan.add(new BasicFeature(contig, 1, 30));
            createTestsForFeatures(oneLargeSpan);
        }

        // test in the presence of a partially spanning element
        {
            List<Feature> partialSpanStart = new ArrayList<Feature>(handPickedFeatures);
            partialSpanStart.add(new BasicFeature(contig, 1, 6));
            createTestsForFeatures(partialSpanStart);
        }

        // test in the presence of a partially spanning element at the end
        {
            List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
            partialSpanEnd.add(new BasicFeature(contig, 10, 30));
            createTestsForFeatures(partialSpanEnd);
        }

        // no data at all
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, 5, 5);
        new ReadMetaDataTrackerRODStreamTest(Collections.<Feature>emptyList(), loc);
    }

    // --------------------------------------------------------------------------------
    //
    // tests for the lower level IntervalOverlappingRODsFromStream
    //
    // --------------------------------------------------------------------------------

    @DataProvider(name = "ReadMetaDataTrackerRODStreamTest")
    public Object[][] createReadMetaDataTrackerRODStreamTest() {
        return ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
    }

    private GenomeLoc span(final List<GenomeLoc> features) {
        int featuresStart = 1; for ( final GenomeLoc f : features ) featuresStart = Math.min(featuresStart, f.getStart());
        int featuresStop = 1; for ( final GenomeLoc f : features ) featuresStop = Math.max(featuresStop, f.getStop());
        return genomeLocParser.createGenomeLoc(contig, featuresStart, featuresStop);
    }

    private void createTestsForFeatures(final List<Feature> features) {
        int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
        int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());

        for ( final int size : Arrays.asList(1, 5, 10, 100) ) {
            final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
            // regularly spaced
            for ( int start = featuresStart; start < featuresStop; start++) {
                final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
                allIntervals.add(loc);
                new ReadMetaDataTrackerRODStreamTest(features, loc);
            }

            // starting and stopping at every feature
            for ( final Feature f : features ) {
                // just at the feature
                allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart(), f.getEnd()));
                new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));

                // up to end
                allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd()));
                new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));

                // missing by 1
                allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() + 1, f.getEnd() + 1));
                new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));

                // just spanning
                allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd() + 1));
                new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
            }

            new ReadMetaDataTrackerRODStreamTest(features, allIntervals);
        }
    }

    @Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest")
    public void runReadMetaDataTrackerRODStreamTest_singleQuery(final ReadMetaDataTrackerRODStreamTest data) {
        if ( data.intervals.size() == 1 ) {
            final String name = "testName";
            final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
            final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
            testRODStream(data, stream, Collections.singletonList(data.intervals.get(0)));
        }
    }

    @Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_singleQuery")
    public void runReadMetaDataTrackerRODStreamTest_multipleQueries(final ReadMetaDataTrackerRODStreamTest data) {
        if ( data.intervals.size() > 1 ) {
            final String name = "testName";
            final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
            final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
            testRODStream(data, stream, data.intervals);
        }
    }

    private void testRODStream(final ReadMetaDataTrackerRODStreamTest test, final IntervalOverlappingRODsFromStream stream, final List<GenomeLoc> intervals) {
        for ( final GenomeLoc interval : intervals ) {
            final RODRecordList query = stream.getOverlapping(interval);
            final HashSet<Feature> queryFeatures = new HashSet<Feature>();
            for ( final GATKFeature f : query ) queryFeatures.add((Feature)f.getUnderlyingObject());
            final Set<Feature> overlaps = test.getExpectedOverlaps(interval);

            Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
                    " Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());

            BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
                    " Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
        }
    }

    // --------------------------------------------------------------------------------
    //
    // tests for the higher level tracker itself
    //
    // --------------------------------------------------------------------------------

    @DataProvider(name = "ReadMetaDataTrackerTests")
    public Object[][] createTrackerTests() {
        List<Object[]> tests = new ArrayList<Object[]>();

        final Object[][] singleTests = ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
        final List<ReadMetaDataTrackerRODStreamTest> multiSiteTests = new ArrayList<ReadMetaDataTrackerRODStreamTest>();
        for ( final Object[] singleTest : singleTests ) {
            if ( ((ReadMetaDataTrackerRODStreamTest)singleTest[0]).intervals.size() > 1 )
                multiSiteTests.add((ReadMetaDataTrackerRODStreamTest)singleTest[0]);
        }

        for ( final boolean testStateless : Arrays.asList(true, false) ) {
            // all pairwise tests
            for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 2, false)) {
                tests.add(new Object[]{singleTest, testStateless});
            }

            // all 3 way pairwise tests
            //for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
            //    tests.add(new Object[]{singleTest, testStateless});
            //}
        }

        logger.warn("Creating " + tests.size() + " tests for ReadMetaDataTrackerTests");
        return tests.toArray(new Object[][]{});
    }

    @Test(enabled = true, dataProvider = "ReadMetaDataTrackerTests", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_multipleQueries")
    public void runReadMetaDataTrackerTest(final List<ReadMetaDataTrackerRODStreamTest> RODs, final boolean testStateless) {
        final List<String> names = new ArrayList<String>();
        final List<PeekableIterator<RODRecordList>> iterators = new ArrayList<PeekableIterator<RODRecordList>>();
        final List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
        final List<RodBinding<Feature>> rodBindings = new ArrayList<RodBinding<Feature>>();

        for ( int i = 0; i < RODs.size(); i++ ) {
            final RodBinding<Feature> rodBinding = new RodBinding<Feature>(Feature.class, "name"+i);
            rodBindings.add(rodBinding);
            final String name = rodBinding.getName();
            names.add(name);
            iterators.add(RODs.get(i).getIterator(name));
            intervals.addAll(RODs.get(i).intervals);
        }

        Collections.sort(intervals);
        final GenomeLoc span = span(intervals);
        final IntervalReferenceOrderedView view = new IntervalReferenceOrderedView(genomeLocParser, span, names, iterators);

        if ( testStateless ) {
            // test each tracker is well formed, as each is created
            for ( final GenomeLoc interval : intervals ) {
                final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
                testMetaDataTrackerBindings(tracker, interval, RODs, rodBindings);
            }
        } else {
            // tests all trackers are correct after reading them into an array
            // this checks that the trackers are be safely stored away and analyzed later (critical for nano-scheduling)
            final List<RefMetaDataTracker> trackers = new ArrayList<RefMetaDataTracker>();
            for ( final GenomeLoc interval : intervals ) {
                final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
                trackers.add(tracker);
            }

            for ( int i = 0; i < trackers.size(); i++) {
                testMetaDataTrackerBindings(trackers.get(i), intervals.get(i), RODs, rodBindings);
            }
        }
    }

    private void testMetaDataTrackerBindings(final RefMetaDataTracker tracker,
                                             final GenomeLoc interval,
                                             final List<ReadMetaDataTrackerRODStreamTest> RODs,
                                             final List<RodBinding<Feature>> rodBindings) {
        for ( int i = 0; i < RODs.size(); i++ ) {
            final ReadMetaDataTrackerRODStreamTest test = RODs.get(i);
            final List<Feature> queryFeaturesList = tracker.getValues(rodBindings.get(i));
            final Set<Feature> queryFeatures = new HashSet<Feature>(queryFeaturesList);
            final Set<Feature> overlaps = test.getExpectedOverlaps(interval);

            Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
                    " Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());

            BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
                    " Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
        }
    }

    static class TribbleIteratorFromCollection implements Iterator<RODRecordList> {
        // current location
        private final String name;
        final Queue<GATKFeature> gatkFeatures;

        public TribbleIteratorFromCollection(final String name, final GenomeLocParser genomeLocParser, final List<Feature> features) {
            this.name = name;

            this.gatkFeatures = new LinkedList<GATKFeature>();
            for ( final Feature f : features )
                gatkFeatures.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
        }

        @Override
        public boolean hasNext() {
            return ! gatkFeatures.isEmpty();
        }

        @Override
        public RODRecordList next() {
            final GATKFeature first = gatkFeatures.poll();
            final Collection<GATKFeature> myFeatures = new LinkedList<GATKFeature>();
            myFeatures.add(first);
            while ( gatkFeatures.peek() != null && gatkFeatures.peek().getLocation().getStart() == first.getStart() )
                myFeatures.add(gatkFeatures.poll());

            GenomeLoc loc = first.getLocation();
            for ( final GATKFeature feature : myFeatures )
                loc = loc.merge(feature.getLocation());

            return new RODRecordListImpl(name, myFeatures, loc); // is this safe?
        }

        @Override public void remove() { throw new IllegalStateException("GRRR"); }
    }
}

TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.providers.IntervalReferenceOrderedViewUnitTest$CompareFeatures

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.