Package org.broadinstitute.gatk.utils

Source Code of org.broadinstitute.gatk.utils.GenomeLocParserUnitTest

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.utils;


import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.tribble.BasicFeature;
import htsjdk.tribble.Feature;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

/**
* @author aaron
*         <p/>
*         Class GenomeLocParserUnitTest
*         <p/>
*         Test out the functionality of the new genome loc parser
*/
public class GenomeLocParserUnitTest extends BaseTest {
    private GenomeLocParser genomeLocParser;
    private SAMFileHeader header;

    @BeforeClass
         public void init() {
        header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
        genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
    }

    @Test(expectedExceptions=UserException.MalformedGenomeLoc.class)
    public void testGetContigIndex() {
        assertEquals(genomeLocParser.getContigIndex("blah"), -1); // should not be in the reference
    }               

    @Test
    public void testGetContigIndexValid() {
        assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
    }

    @Test(expectedExceptions=UserException.class)
    public void testGetContigInfoUnknownContig1() {
        assertEquals(null, genomeLocParser.getContigInfo("blah")); // should *not* be in the reference
    }

    @Test(expectedExceptions=UserException.class)
    public void testGetContigInfoUnknownContig2() {
        assertEquals(null, genomeLocParser.getContigInfo(null)); // should *not* be in the reference
    }

    @Test()
    public void testHasContigInfoUnknownContig1() {
        assertEquals(false, genomeLocParser.contigIsInDictionary("blah")); // should *not* be in the reference
    }

    @Test()
    public void testHasContigInfoUnknownContig2() {
        assertEquals(false, genomeLocParser.contigIsInDictionary(null)); // should *not* be in the reference
    }

    @Test()
    public void testHasContigInfoKnownContig() {
        assertEquals(true, genomeLocParser.contigIsInDictionary("chr1")); // should be in the reference
    }

    @Test
    public void testGetContigInfoKnownContig() {
        assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
    }

    @Test(expectedExceptions=ReviewedGATKException.class)
    public void testParseBadString() {
        genomeLocParser.parseGenomeLoc("Bad:0-1");
    }

    @Test
    public void testContigHasColon() {
        SAMFileHeader header = new SAMFileHeader();
        header.setSortOrder(htsjdk.samtools.SAMFileHeader.SortOrder.coordinate);
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
        rec.setSequenceLength(10);
        dict.addSequence(rec);
        header.setSequenceDictionary(dict);

        final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
        GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
        assertEquals(0, loc.getContigIndex());
        assertEquals(loc.getStart(), 4);
        assertEquals(loc.getStop(), 5);
    }

    @Test
    public void testParseGoodString() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
        assertEquals(0, loc.getContigIndex());
        assertEquals(loc.getStop(), 10);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc1() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
        assertEquals(0, loc.getContigIndex());
        assertEquals(loc.getStop(), 100);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc1point5() { // in honor of VAAL!
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1");
        assertEquals(0, loc.getContigIndex());
        assertEquals(loc.getStop(), 1);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc2() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
        assertEquals("chr1", loc.getContig());
        assertEquals(loc.getStop(), 100);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc3() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
        assertEquals("chr1", loc.getContig());
        assertEquals(loc.getStop(), 1);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc4() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1);
        assertEquals(0, loc.getContigIndex());
        assertEquals(loc.getStop(), 1);
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testCreateGenomeLoc5() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
        GenomeLoc copy = genomeLocParser.createGenomeLoc(loc.getContig(),loc.getStart(),loc.getStop());
        assertEquals(0, copy.getContigIndex());
        assertEquals(copy.getStop(), 100);
        assertEquals(copy.getStart(), 1);
    }

    @Test
    public void testGenomeLocPlusSign() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1+");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    @Test
    public void testGenomeLocParseOnlyChrome() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    @Test(expectedExceptions=ReviewedGATKException.class)
    public void testGenomeLocParseOnlyBadChrome() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr12");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    @Test(expectedExceptions=ReviewedGATKException.class)
    public void testGenomeLocBad() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    @Test(expectedExceptions=UserException.class)
    public void testGenomeLocBad2() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    @Test(expectedExceptions=UserException.class)
    public void testGenomeLocBad3() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0");
        assertEquals(loc.getContigIndex(), 0);
        assertEquals(loc.getStop(), 10); // the size
        assertEquals(loc.getStart(), 1);
    }

    // test out the validating methods
    @Test
    public void testValidationOfGenomeLocs() {
        assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1));
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
        assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
        assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
    }

    @Test(expectedExceptions = ReviewedGATKException.class)
    public void testValidateGenomeLoc() {
        // bad contig index
        genomeLocParser.validateGenomeLoc("chr1", 1, 1, 2, false);
    }

    private static class FlankingGenomeLocTestData extends TestDataProvider {
        final GenomeLocParser parser;
        final int basePairs;
        final GenomeLoc original, flankStart, flankStop;

        private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
            super(FlankingGenomeLocTestData.class, name);
            this.parser = parser;
            this.basePairs = basePairs;
            this.original = parse(parser, original);
            this.flankStart = flankStart == null ? null : parse(parser, flankStart);
            this.flankStop = flankStop == null ? null : parse(parser, flankStop);
        }

        private static GenomeLoc parse(GenomeLocParser parser, String str) {
            return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
        }
    }

    @DataProvider(name = "flankingGenomeLocs")
    public Object[][] getFlankingGenomeLocs() {
        int contigLength = 10000;
        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
        GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());

        new FlankingGenomeLocTestData("atStartBase1", parser, 1,
                "chr1:1", null, "chr1:2");

        new FlankingGenomeLocTestData("atStartBase50", parser, 50,
                "chr1:1", null, "chr1:2-51");

        new FlankingGenomeLocTestData("atStartRange50", parser, 50,
                "chr1:1-10", null, "chr1:11-60");

        new FlankingGenomeLocTestData("atEndBase1", parser, 1,
                "chr1:" + contigLength, "chr1:" + (contigLength - 1), null);

        new FlankingGenomeLocTestData("atEndBase50", parser, 50,
                "chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);

        new FlankingGenomeLocTestData("atEndRange50", parser, 50,
                String.format("chr1:%d-%d", contigLength - 10, contigLength),
                String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
                null);

        new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
                "chr1:2", "chr1:1", "chr1:3");

        new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
                "chr1:21-30", "chr1:1-20", "chr1:31-80");

        new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
                "chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);

        new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
                String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
                String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
                String.format("chr1:%d-%d", contigLength - 20, contigLength));

        new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
                "chr1:3", "chr1:2", "chr1:4");

        new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
                "chr1:101-200", "chr1:51-100", "chr1:201-250");

        new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
                "chr1:" + (contigLength - 3),
                "chr1:" + (contigLength - 4),
                "chr1:" + (contigLength - 2));

        new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
                String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
                String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
                String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));

        new FlankingGenomeLocTestData("unmapped", parser, 50,
                "unmapped", null, null);

        new FlankingGenomeLocTestData("fullContig", parser, 50,
                "chr1", null, null);

        return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
    }

    @Test(dataProvider = "flankingGenomeLocs")
    public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
        GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
        String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
                data.toString(), data.original, actual, data.flankStart);
        assertEquals(actual, data.flankStart, description);
    }

    @Test(dataProvider = "flankingGenomeLocs")
    public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
        GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
        String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
                data.toString(), data.original, actual, data.flankStop);
        assertEquals(actual, data.flankStop, description);
    }

    @DataProvider(name = "parseGenomeLoc")
    public Object[][] makeParsingTest() {
        final List<Object[]> tests = new LinkedList<Object[]>();

        tests.add(new Object[]{ "chr1:10", "chr1", 10 });
        tests.add(new Object[]{ "chr1:100", "chr1", 100 });
        tests.add(new Object[]{ "chr1:1000", "chr1", 1000 });
        tests.add(new Object[]{ "chr1:1,000", "chr1", 1000 });
        tests.add(new Object[]{ "chr1:10000", "chr1", 10000 });
        tests.add(new Object[]{ "chr1:10,000", "chr1", 10000 });
        tests.add(new Object[]{ "chr1:100000", "chr1", 100000 });
        tests.add(new Object[]{ "chr1:100,000", "chr1", 100000 });
        tests.add(new Object[]{ "chr1:1000000", "chr1", 1000000 });
        tests.add(new Object[]{ "chr1:1,000,000", "chr1", 1000000 });
        tests.add(new Object[]{ "chr1:1000,000", "chr1", 1000000 });
        tests.add(new Object[]{ "chr1:1,000000", "chr1", 1000000 });

        return tests.toArray(new Object[][]{});
    }

    @Test( dataProvider = "parseGenomeLoc")
    public void testParsingPositions(final String string, final String contig, final int start) {
        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000);
        GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
        final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string);
        Assert.assertEquals(loc.getContig(), contig);
        Assert.assertEquals(loc.getStart(), start);
        Assert.assertEquals(loc.getStop(), start);
    }

    @Test( )
    public void testCreationFromSAMRecord() {
        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
        Assert.assertEquals(loc.getContig(), read.getReferenceName());
        Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
        Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
        Assert.assertEquals(loc.getStop(), read.getAlignmentEnd());
    }

    @Test( )
    public void testCreationFromSAMRecordUnmapped() {
        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
        read.setReadUnmappedFlag(true);
        read.setReferenceIndex(-1);
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
        Assert.assertTrue(loc.isUnmapped());
    }

    @Test( )
    public void testCreationFromSAMRecordUnmappedButOnGenome() {
        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
        read.setReadUnmappedFlag(true);
        read.setCigarString("*");
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
        Assert.assertEquals(loc.getContig(), read.getReferenceName());
        Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
        Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
        Assert.assertEquals(loc.getStop(), read.getAlignmentStart());
    }

    @Test
    public void testCreationFromFeature() {
        final Feature feature = new BasicFeature("chr1", 1, 5);
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
        Assert.assertEquals(loc.getContig(), feature.getChr());
        Assert.assertEquals(loc.getStart(), feature.getStart());
        Assert.assertEquals(loc.getStop(), feature.getEnd());
    }

    @Test
    public void testCreationFromVariantContext() {
        final VariantContext feature = new VariantContextBuilder("x", "chr1", 1, 5, Arrays.asList(Allele.create("AAAAA", true))).make();
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
        Assert.assertEquals(loc.getContig(), feature.getChr());
        Assert.assertEquals(loc.getStart(), feature.getStart());
        Assert.assertEquals(loc.getStop(), feature.getEnd());
    }

    @Test
    public void testcreateGenomeLocOnContig() throws FileNotFoundException {
        final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
        final SAMSequenceDictionary dict = seq.getSequenceDictionary();
        final GenomeLocParser genomeLocParser = new GenomeLocParser(dict);

        for ( final SAMSequenceRecord rec : dict.getSequences() ) {
            final GenomeLoc loc = genomeLocParser.createOverEntireContig(rec.getSequenceName());
            Assert.assertEquals(loc.getContig(), rec.getSequenceName());
            Assert.assertEquals(loc.getStart(), 1);
            Assert.assertEquals(loc.getStop(), rec.getSequenceLength());
        }
    }

    @DataProvider(name = "GenomeLocOnContig")
    public Object[][] makeGenomeLocOnContig() {
        final List<Object[]> tests = new LinkedList<Object[]>();

        final int contigLength = header.getSequence(0).getSequenceLength();
        for ( int start = -10; start < contigLength + 10; start++ ) {
            for ( final int len : Arrays.asList(1, 10, 20) ) {
                tests.add(new Object[]{ "chr1", start, start + len });
            }
        }

        return tests.toArray(new Object[][]{});
    }

    @Test( dataProvider = "GenomeLocOnContig")
    public void testGenomeLocOnContig(final String contig, final int start, final int stop) {
        final int contigLength = header.getSequence(0).getSequenceLength();
        final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, stop);

        if ( stop < 1 || start > contigLength )
            Assert.assertNull(loc, "GenomeLoc should be null if the start/stops are not meaningful");
        else {
            Assert.assertNotNull(loc);
            Assert.assertEquals(loc.getContig(), contig);
            Assert.assertEquals(loc.getStart(), Math.max(start, 1));
            Assert.assertEquals(loc.getStop(), Math.min(stop, contigLength));
        }
    }

    @DataProvider(name = "GenomeLocPadding")
    public Object[][] makeGenomeLocPadding() {
        final List<Object[]> tests = new LinkedList<Object[]>();

        final int contigLength = header.getSequence(0).getSequenceLength();
        for ( int pad = 0; pad < contigLength + 1; pad++) {
            for ( int start = 1; start < contigLength; start++ ) {
                for ( int stop = start; stop < contigLength; stop++ ) {
                    tests.add(new Object[]{ genomeLocParser.createGenomeLoc("chr1", start, stop), pad});
                }
            }
        }

        return tests.toArray(new Object[][]{});
    }

    @Test( dataProvider = "GenomeLocPadding")
    public void testGenomeLocPadding(final GenomeLoc input, final int pad) {
        final int contigLength = header.getSequence(0).getSequenceLength();
        final GenomeLoc padded = genomeLocParser.createPaddedGenomeLoc(input, pad);

        Assert.assertNotNull(padded);
        Assert.assertEquals(padded.getContig(), input.getContig());
        Assert.assertEquals(padded.getStart(), Math.max(input.getStart() - pad, 1));
        Assert.assertEquals(padded.getStop(), Math.min(input.getStop() + pad, contigLength));
    }
}
TOP

Related Classes of org.broadinstitute.gatk.utils.GenomeLocParserUnitTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.