Package org.broadinstitute.gatk.utils.activeregion

Source Code of org.broadinstitute.gatk.utils.activeregion.ActiveRegionUnitTest

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.utils.activeregion;


// the imports for unit testing.


import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.SAMFileHeader;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;


public class ActiveRegionUnitTest extends BaseTest {
    private final static boolean DEBUG = false;
    private GenomeLocParser genomeLocParser;
    private IndexedFastaSequenceFile seq;
    private String contig;
    private int contigLength;

    @BeforeClass
    public void init() throws FileNotFoundException {
        // sequence
        seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
        genomeLocParser = new GenomeLocParser(seq);
        contig = "1";
        contigLength = genomeLocParser.getContigInfo(contig).getSequenceLength();
    }

    @DataProvider(name = "ActionRegionCreationTest")
    public Object[][] makePollingData() {
        List<Object[]> tests = new ArrayList<Object[]>();
        for ( final int start : Arrays.asList(1, 10, 100, contigLength - 10, contigLength - 1) ) {
            for ( final int size : Arrays.asList(1, 10, 100, 1000) ) {
                for ( final int ext : Arrays.asList(0, 1, 10, 100) ) {
                    for ( final boolean isActive : Arrays.asList(true, false) ) {
                        for ( final boolean addStates : Arrays.asList(true, false) ) {
                            List<ActivityProfileState> states = null;
                            if ( addStates ) {
                                states = new LinkedList<ActivityProfileState>();
                                for ( int i = start; i < start + size; i++ ) {
                                    states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(contig, i + start), isActive ? 1.0 : 0.0));
                                }
                            }
                            final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
                            tests.add(new Object[]{loc, states, isActive, ext});
                        }
                    }
                }
            }
        }

        return tests.toArray(new Object[][]{});
    }

    @Test(enabled = !DEBUG, dataProvider = "ActionRegionCreationTest")
    public void testCreatingActiveRegions(final GenomeLoc loc, final List<ActivityProfileState> supportingStates, final boolean isActive, final int extension) {
        final ActiveRegion region = new ActiveRegion(loc, supportingStates, isActive, genomeLocParser, extension);
        Assert.assertEquals(region.getLocation(), loc);
        Assert.assertEquals(region.getExtendedLoc().getStart(), Math.max(loc.getStart() - extension, 1));
        Assert.assertEquals(region.getExtendedLoc().getStop(), Math.min(loc.getStop() + extension, contigLength));
        Assert.assertEquals(region.getReadSpanLoc().getStart(), Math.max(loc.getStart() - extension, 1));
        Assert.assertEquals(region.getReadSpanLoc().getStop(), Math.min(loc.getStop() + extension, contigLength));
        Assert.assertEquals(region.isActive(), isActive);
        Assert.assertEquals(region.getExtension(), extension);
        Assert.assertEquals(region.getReads(), Collections.emptyList());
        Assert.assertEquals(region.size(), 0);
        Assert.assertEquals(region.getSupportingStates(), supportingStates == null ? Collections.emptyList() : supportingStates);
        Assert.assertNotNull(region.toString());

        assertGoodReferenceGetter(region.getActiveRegionReference(seq), region.getExtendedLoc(), 0);
        assertGoodReferenceGetter(region.getActiveRegionReference(seq, 10), region.getExtendedLoc(), 10);
        assertGoodReferenceGetter(region.getFullReference(seq), region.getReadSpanLoc(), 0);
        assertGoodReferenceGetter(region.getFullReference(seq, 10), region.getReadSpanLoc(), 10);
    }

    private void assertGoodReferenceGetter(final byte[] actualBytes, final GenomeLoc span, final int padding) {
        final int expectedStart = Math.max(span.getStart() - padding, 1);
        final int expectedStop = Math.min(span.getStop() + padding, contigLength);
        final byte[] expectedBytes = seq.getSubsequenceAt(span.getContig(), expectedStart, expectedStop).getBases();
        Assert.assertEquals(actualBytes, expectedBytes);
    }

    @DataProvider(name = "ActiveRegionReads")
    public Object[][] makeActiveRegionReads() {
        List<Object[]> tests = new ArrayList<Object[]>();
        final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
        for ( final int start : Arrays.asList(1, 10, 100, contigLength - 10, contigLength - 1) ) {
            for ( final int readStartOffset : Arrays.asList(-100, -10, 0, 10, 100) ) {
                for ( final int readSize : Arrays.asList(10, 100, 1000) ) {
                    final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, start + 10);

                    final int readStart = Math.max(start + readStartOffset, 1);
                    final int readStop = Math.min(readStart + readSize, contigLength);
                    final int readLength = readStop - readStart + 1;
                    if ( readLength > 0 ) {
                        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, readStart, readLength);
                        final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
                        if ( readLoc.overlapsP(loc) )
                            tests.add(new Object[]{loc, read});
                    }
                }
            }
        }

        return tests.toArray(new Object[][]{});
    }

    @Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads")
    public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) throws Exception {
        final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read));

        final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0);
        final ActiveRegion region2 = new ActiveRegion(loc, null, true, genomeLocParser, 0);
        Assert.assertEquals(region.getReads(), Collections.emptyList());
        Assert.assertEquals(region.size(), 0);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), loc);
        Assert.assertTrue(region.equalExceptReads(region2));

        region.add(read);
        Assert.assertEquals(region.getReads(), Collections.singletonList(read));
        Assert.assertEquals(region.size(), 1);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), expectedSpan);
        Assert.assertTrue(region.equalExceptReads(region2));

        region.clearReads();
        Assert.assertEquals(region.getReads(), Collections.emptyList());
        Assert.assertEquals(region.size(), 0);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), loc);
        Assert.assertTrue(region.equalExceptReads(region2));

        region.addAll(Collections.singleton(read));
        Assert.assertEquals(region.getReads(), Collections.singletonList(read));
        Assert.assertEquals(region.size(), 1);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), expectedSpan);
        Assert.assertTrue(region.equalExceptReads(region2));

        region.removeAll(Collections.<GATKSAMRecord>emptySet());
        Assert.assertEquals(region.getReads(), Collections.singletonList(read));
        Assert.assertEquals(region.size(), 1);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), expectedSpan);
        Assert.assertTrue(region.equalExceptReads(region2));

        region.removeAll(Collections.singleton(read));
        Assert.assertEquals(region.getReads(), Collections.emptyList());
        Assert.assertEquals(region.size(), 0);
        Assert.assertEquals(region.getExtendedLoc(), loc);
        Assert.assertEquals(region.getReadSpanLoc(), loc);
        Assert.assertTrue(region.equalExceptReads(region2));

        final GATKSAMRecord read2 = (GATKSAMRecord)read.clone();
        read2.setReadName(read.getReadName() + ".clone");

        for ( final GATKSAMRecord readToKeep : Arrays.asList(read, read2)) {
            region.addAll(Arrays.asList(read, read2));
            final GATKSAMRecord readToDiscard = readToKeep == read ? read2 : read;
            region.removeAll(Collections.singleton(readToDiscard));
            Assert.assertEquals(region.getReads(), Arrays.asList(readToKeep));
            Assert.assertEquals(region.size(), 1);
            Assert.assertEquals(region.getExtendedLoc(), loc);
        }
    }

    // -----------------------------------------------------------------------------------------------
    //
    // Make sure bad inputs are properly detected
    //
    // -----------------------------------------------------------------------------------------------

    @DataProvider(name = "BadReadsTest")
    public Object[][] makeBadReadsTest() {
        List<Object[]> tests = new ArrayList<Object[]>();
        final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
        tests.add(new Object[]{
                ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10),
                ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 9, 10)});
        tests.add(new Object[]{
                ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10),
                ArtificialSAMUtils.createArtificialRead(header, "read2", 1, 9, 10)});
        tests.add(new Object[]{
                ArtificialSAMUtils.createArtificialRead(header, "read1", 1, 10, 10),
                ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 9, 10)});
        return tests.toArray(new Object[][]{});
    }

    @Test(enabled = !DEBUG, dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
    public void testBadReads(final GATKSAMRecord read1, final GATKSAMRecord read2) {
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(read1);
        final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0);
        region.add(read1);
        region.add(read2);
    }

    // -----------------------------------------------------------------------------------------------
    //
    // Make sure we can properly cut up an active region based on engine intervals
    //
    // -----------------------------------------------------------------------------------------------

    @DataProvider(name = "SplitActiveRegion")
    public Object[][] makeSplitActiveRegion() {
        List<Object[]> tests = new ArrayList<Object[]>();

        final GenomeLoc whole_span = genomeLocParser.createGenomeLoc("20", 1, 500);
        final GenomeLoc gl_before = genomeLocParser.createGenomeLoc("20", 1, 9);
        final GenomeLoc gl_after = genomeLocParser.createGenomeLoc("20", 250, 500);
        final GenomeLoc gl_diff_contig = genomeLocParser.createGenomeLoc("19", 40, 50);

        final int regionStart = 10;
        final int regionStop = 100;
        final GenomeLoc region = genomeLocParser.createGenomeLoc("20", regionStart, regionStop);

        for ( final GenomeLoc noEffect : Arrays.asList(whole_span) )
            tests.add(new Object[]{
                    region,
                    Arrays.asList(noEffect),
                    Arrays.asList(region)});

        for ( final GenomeLoc noOverlap : Arrays.asList(gl_before, gl_after, gl_diff_contig) )
            tests.add(new Object[]{
                    region,
                    Arrays.asList(noOverlap),
                    Arrays.asList()});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 5, 50)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 50))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, 200)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, regionStop))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 70, 200)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 70, regionStop))});

        tests.add(new Object[]{region,
                Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, 200)),
                Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, regionStop))});

        return tests.toArray(new Object[][]{});
    }

    @Test(dataProvider = "SplitActiveRegion")
    public void testSplitActiveRegion(final GenomeLoc regionLoc, final List<GenomeLoc> intervalLocs, final List<GenomeLoc> expectedRegionLocs) {
        for ( final boolean addSubstates : Arrays.asList(true, false) ) {
            final List<ActivityProfileState> states;
            if ( addSubstates ) {
                states = new LinkedList<ActivityProfileState>();
                for ( int i = 0; i < regionLoc.size(); i++ )
                    states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(regionLoc.getContig(), regionLoc.getStart() + i), 1.0));
            } else {
                states = null;
            }

            final ActiveRegion region = new ActiveRegion(regionLoc, states, true, genomeLocParser, 0);
            final GenomeLocSortedSet intervals = new GenomeLocSortedSet(genomeLocParser,  intervalLocs);
            final List<ActiveRegion> regions = region.splitAndTrimToIntervals(intervals);

            Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations");
            for ( int i = 0; i < expectedRegionLocs.size(); i++ ) {
                final GenomeLoc expected = expectedRegionLocs.get(i);
                final ActiveRegion actual = regions.get(i);
                Assert.assertEquals(actual.getLocation(), expected, "Bad region after split");
                Assert.assertEquals(actual.isActive(), region.isActive());
                Assert.assertEquals(actual.getExtension(), region.getExtension());
            }
        }
    }

    // -----------------------------------------------------------------------------------------------
    //
    // Make sure we can properly cut up an active region based on engine intervals
    //
    // -----------------------------------------------------------------------------------------------

    @DataProvider(name = "TrimActiveRegionData")
    public Object[][] makeTrimActiveRegionData() {
        List<Object[]> tests = new ArrayList<Object[]>();

        // fully enclosed within active region
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 15, 16),
                genomeLocParser.createGenomeLoc("20", 15, 16), 0});

        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 10, 15),
                genomeLocParser.createGenomeLoc("20", 10, 15), 0});

        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 15, 20),
                genomeLocParser.createGenomeLoc("20", 15, 20), 0});

        // needs extra padding on the right
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 15, 25),
                genomeLocParser.createGenomeLoc("20", 15, 20), 5});

        // needs extra padding on the left
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 5, 15),
                genomeLocParser.createGenomeLoc("20", 10, 15), 5});

        // needs extra padding on both
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 7, 21),
                genomeLocParser.createGenomeLoc("20", 10, 20), 3});
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 9, 23),
                genomeLocParser.createGenomeLoc("20", 10, 20), 3});

        // desired span captures everything, so we're returning everything.  Tests that extension is set correctly
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 10, 20), 10,
                genomeLocParser.createGenomeLoc("20", 1, 50),
                genomeLocParser.createGenomeLoc("20", 10, 20), 10});

        // At the start of the chromosome, potentially a bit weird
        tests.add(new Object[]{
                genomeLocParser.createGenomeLoc("20", 1, 10), 10,
                genomeLocParser.createGenomeLoc("20", 1, 50),
                genomeLocParser.createGenomeLoc("20", 1, 10), 10});

        return tests.toArray(new Object[][]{});
    }

    @Test(dataProvider = "TrimActiveRegionData")
    public void testTrimActiveRegion(final GenomeLoc regionLoc, final int extension, final GenomeLoc desiredSpan, final GenomeLoc expectedActiveRegion, final int expectedExtension) {
        final ActiveRegion region = new ActiveRegion(regionLoc, Collections.<ActivityProfileState>emptyList(), true, genomeLocParser, extension);
        final ActiveRegion trimmed = region.trim(desiredSpan);
        Assert.assertEquals(trimmed.getLocation(), expectedActiveRegion, "Incorrect region");
        Assert.assertEquals(trimmed.getExtension(), expectedExtension, "Incorrect region");
    }
}
TOP

Related Classes of org.broadinstitute.gatk.utils.activeregion.ActiveRegionUnitTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.