Package org.broadinstitute.gatk.engine

Source Code of org.broadinstitute.gatk.engine.EngineFeaturesIntegrationTest$EngineErrorHandlingTestProvider

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine;

import htsjdk.samtools.*;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.tribble.readers.LineIterator;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.datasources.reference.ReferenceDataSource;
import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.refdata.tracks.RMDTrack;
import org.broadinstitute.gatk.engine.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.gatk.engine.refdata.utils.GATKFeature;
import org.broadinstitute.gatk.engine.walkers.ReadFilters;
import org.broadinstitute.gatk.engine.walkers.ReadWalker;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.tools.walkers.qc.ErrorThrowing;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.*;
import java.util.*;

/**
*
*/
public class EngineFeaturesIntegrationTest extends WalkerTest {
    private void testBadRODBindingInput(String type, String name, Class c) {
        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variant:variant," + type + " "
                + b37dbSNP132 + " -R " + b37KGReference + " -o %s",
                1, c);
        executeTest(name, spec);
    }

    @Test() private void testBadRODBindingInputType1() {
        testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker", UserException.BadArgumentValue.class);
    }

    @Test() private void testBadRODBindingInputType3() {
        testBadRODBindingInput("bed", "Bed input to VCF expecting walker", UserException.BadArgumentValue.class);
    }

    @Test() private void testBadRODBindingInputTypeUnknownType() {
        testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker", UserException.UnknownTribbleType.class);
    }

    private void testMissingFile(String name, String missingBinding) {
        WalkerTestSpec spec = new WalkerTestSpec(missingBinding + " -R " + b37KGReference + " -o %s",
                1, UserException.CouldNotReadInputFile.class);
        executeTest(name, spec);
    }

    @Test() private void testMissingBAMnt1() {
        testMissingFile("missing BAM", "-T PrintReads -I missing.bam -nt 1");
    }
    @Test() private void testMissingBAMnt4() {
        testMissingFile("missing BAM", "-T PrintReads -I missing.bam -nt 4");
    }
    @Test() private void testMissingVCF() {
        testMissingFile("missing VCF", "-T SelectVariants -V missing.vcf");
    }
    @Test() private void testMissingInterval() {
        testMissingFile("missing interval", "-T PrintReads -L missing.interval_list -I " + b37GoodBAM);
    }


    // --------------------------------------------------------------------------------
    //
    // Test that our exceptions are coming back as we expect
    //
    // --------------------------------------------------------------------------------

    private class EngineErrorHandlingTestProvider extends TestDataProvider {
        final Class expectedException;
        final String args;
        final int iterationsToTest;

        public EngineErrorHandlingTestProvider(Class exceptedException, final String args) {
            super(EngineErrorHandlingTestProvider.class);
            this.expectedException = exceptedException;
            this.args = args;
            this.iterationsToTest = args.equals("") ? 1 : 10;
            setName(String.format("Engine error handling: expected %s with args %s", exceptedException, args));
        }
    }

    @DataProvider(name = "EngineErrorHandlingTestProvider")
    public Object[][] makeEngineErrorHandlingTestProvider() {
        for ( final ErrorThrowing.FailMethod failMethod : ErrorThrowing.FailMethod.values() ) {
            if ( failMethod == ErrorThrowing.FailMethod.TREE_REDUCE )
                continue; // cannot reliably throw errors in TREE_REDUCE

            final String failArg = " -fail " + failMethod.name();
            for ( final String args : Arrays.asList("", " -nt 2", " -nct 2") ) {
                new EngineErrorHandlingTestProvider(NullPointerException.class, failArg + args);
                new EngineErrorHandlingTestProvider(UserException.class, failArg + args);
                new EngineErrorHandlingTestProvider(ReviewedGATKException.class, failArg + args);
            }
        }

        return EngineErrorHandlingTestProvider.getTests(EngineErrorHandlingTestProvider.class);
    }

    //
    // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type
    //
    @Test(enabled = true, dataProvider = "EngineErrorHandlingTestProvider", timeOut = 60 * 1000 )
    public void testEngineErrorHandlingTestProvider(final EngineErrorHandlingTestProvider cfg) {
        for ( int i = 0; i < cfg.iterationsToTest; i++ ) {
            final String root = "-T ErrorThrowing -R " + exampleFASTA;
            final String args = root + cfg.args + " -E " + cfg.expectedException.getSimpleName();
            WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException);

            executeTest(cfg.toString(), spec);
        }
    }

    // --------------------------------------------------------------------------------
    //
    // Test that read filters are being applied in the order we expect
    //
    // --------------------------------------------------------------------------------

    @ReadFilters({MappingQualityUnavailableFilter.class})
    public static class DummyReadWalkerWithMapqUnavailableFilter extends ReadWalker<Integer, Integer> {
        @Output
        PrintStream out;

        @Override
        public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
            return 1;
        }

        @Override
        public Integer reduceInit() {
            return 0;
        }

        @Override
        public Integer reduce(Integer value, Integer sum) {
            return value + sum;
        }

        @Override
        public void onTraversalDone(Integer result) {
            out.println(result);
        }
    }

    @Test(enabled = true)
    public void testUserReadFilterAppliedBeforeWalker() {
        WalkerTestSpec spec = new WalkerTestSpec("-R " + b37KGReference + " -I " + privateTestDir + "allMAPQ255.bam"
                + " -T DummyReadWalkerWithMapqUnavailableFilter -o %s -L MT -rf ReassignMappingQuality",
                1, Arrays.asList("ecf27a776cdfc771defab1c5d19de9ab"));
        executeTest("testUserReadFilterAppliedBeforeWalker", spec);
    }

    @Test
    public void testNegativeCompress() {
        testBadCompressArgument(-1);
    }

    @Test
    public void testTooBigCompress() {
        testBadCompressArgument(100);
    }

    private void testBadCompressArgument(final int compress) {
        WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference + " -I " + privateTestDir + "NA12878.1_10mb_2_10mb.bam -o %s -compress " + compress,
                1, UserException.class);
        executeTest("badCompress " + compress, spec);
    }

    // --------------------------------------------------------------------------------
    //
    // Test that the VCF version key is what we expect
    //
    // --------------------------------------------------------------------------------
    @Test(enabled = true)
    public void testGATKVersionInVCF() throws Exception {
        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
                " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf"
                + " -o %s -L 20:61098",
                1, Arrays.asList(""));
        spec.disableShadowBCF();
        final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0);
        final VCFCodec codec = new VCFCodec();
        final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf)));
        final VCFHeaderLine versionLine = header.getMetaDataLine(GATKVCFUtils.GATK_COMMAND_LINE_KEY);
        Assert.assertNotNull(versionLine);
        Assert.assertTrue(versionLine.toString().contains("SelectVariants"));
    }

    @Test(enabled = true)
    public void testMultipleGATKVersionsInVCF() throws Exception {
        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
                " -V " + privateTestDir + "gatkCommandLineInHeader.vcf"
                + " -o %s",
                1, Arrays.asList(""));
        spec.disableShadowBCF();
        final File vcf = executeTest("testMultipleGATKVersionsInVCF", spec).first.get(0);
        final VCFCodec codec = new VCFCodec();
        final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf)));

        boolean foundHC = false;
        boolean foundSV = false;
        for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
            if ( line.getKey().equals(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) {
                if ( line.toString().contains("HaplotypeCaller") ) {
                    Assert.assertFalse(foundHC);
                    foundHC = true;
                }
                if ( line.toString().contains("SelectVariants") ) {
                    Assert.assertFalse(foundSV);
                    foundSV = true;
                }
            }
        }

        Assert.assertTrue(foundHC, "Didn't find HaplotypeCaller command line header field");
        Assert.assertTrue(foundSV, "Didn't find SelectVariants command line header field");
    }

    // --------------------------------------------------------------------------------
    //
    // Test that defaultBaseQualities actually works
    //
    // --------------------------------------------------------------------------------

    public WalkerTestSpec testDefaultBaseQualities(final Integer value, final String md5) {
        return new WalkerTestSpec("-T PrintReads -R " + b37KGReference + " -I " + privateTestDir + "/baseQualitiesToFix.bam -o %s"
                + (value != null ? " --defaultBaseQualities " + value : ""),
                1, Arrays.asList(md5));
    }

    @Test()
    public void testDefaultBaseQualities20() {
        executeTest("testDefaultBaseQualities20", testDefaultBaseQualities(20, "7d254a9d0ec59c66ee3e137f56f4c78f"));
    }

    @Test()
    public void testDefaultBaseQualities30() {
        executeTest("testDefaultBaseQualities30", testDefaultBaseQualities(30, "0f50def6cbbbd8ccd4739e2b3998e503"));
    }

    @Test(expectedExceptions = Exception.class)
    public void testDefaultBaseQualitiesNoneProvided() {
        executeTest("testDefaultBaseQualitiesNoneProvided", testDefaultBaseQualities(null, ""));
    }

    // --------------------------------------------------------------------------------
    //
    // Test engine-level cigar consolidation
    //
    // --------------------------------------------------------------------------------

    @Test
    public void testGATKEngineConsolidatesCigars() {
        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "zero_length_cigar_elements.bam" +
                                                       " -o %s",
                                                       1, Arrays.asList(""))// No MD5s; we only want to check the cigar

        final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
        final SAMFileReader reader = new SAMFileReader(outputBam);
        reader.setValidationStringency(ValidationStringency.SILENT);
        reader.setSAMRecordFactory(new GATKSamRecordFactory());

        final SAMRecord read = reader.iterator().next();
        reader.close();

        // Original cigar was 0M3M0M8M. Check that it's been consolidated after running through the GATK engine:
        Assert.assertEquals(read.getCigarString(), "11M", "Cigar 0M3M0M8M not consolidated correctly by the engine");
    }

    // --------------------------------------------------------------------------------
    //
    // Test on-the-fly sample renaming
    //
    // --------------------------------------------------------------------------------

    // On-the-fly sample renaming test case: one single-sample bam with multiple read groups
    @Test
    public void testOnTheFlySampleRenamingWithSingleBamFile() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam  myNewSampleName"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " -o %s",
                                                       1, Arrays.asList(""))// No MD5s; we only want to check the read groups

        final File outputBam = executeTest("testOnTheFlySampleRenamingWithSingleBamFile", spec).first.get(0);
        final SAMFileReader reader = new SAMFileReader(outputBam);

        for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
            Assert.assertEquals(readGroup.getSample(), "myNewSampleName", String.format("Sample for read group %s not renamed correctly", readGroup.getId()));
        }

        reader.close();
    }

    // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam
    @Test
    public void testOnTheFlySampleRenamingWithMultipleBamFiles() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam  newSampleFor12878",
                              privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam  newSampleFor12891",
                              privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam  newSampleFor12892"));

        final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
        for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
            final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
            final SAMFileReader inputBamReader = new SAMFileReader(inputBam);
            final String newSampleName = String.format("newSampleFor%s", inputBamID);
            for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
                readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
            }
            inputBamReader.close();
        }

        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam" +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " -o %s",
                                                       1, Arrays.asList(""))// No MD5s; we only want to check the read groups

        final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0);
        final SAMFileReader outputBamReader = new SAMFileReader(outputBam);

        int totalReadGroupsSeen = 0;
        for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
            Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
                                String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
            totalReadGroupsSeen++;
        }

        Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");

        outputBamReader.close();
    }

    // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam,
    //                                       performing renaming in only SOME of the bams
    @Test
    public void testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename() throws IOException {
        // Rename samples for NA12878 and NA12892, but not for NA12891
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam  newSampleFor12878",
                              privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam  newSampleFor12892"));

        final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
        for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
            final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
            final SAMFileReader inputBamReader = new SAMFileReader(inputBam);

            // Special-case NA12891, which we're not renaming:
            final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID);

            for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
                readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
            }
            inputBamReader.close();
        }

        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam" +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " -o %s",
                                                       1, Arrays.asList(""))// No MD5s; we only want to check the read groups

        final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0);
        final SAMFileReader outputBamReader = new SAMFileReader(outputBam);

        int totalReadGroupsSeen = 0;
        for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
            Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
                                String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
            totalReadGroupsSeen++;
        }

        Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");

        outputBamReader.close();
    }

    // On-the-fly sample renaming test case: two single-sample bams with read group collisions
    @Test
    public void testOnTheFlySampleRenamingWithReadGroupCollisions() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam  newSampleFor12878",
                              privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam  newSampleForNot12878"));

        final Set<String> na12878ReadGroups = new HashSet<>();
        final SAMFileReader inputBamReader = new SAMFileReader(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam"));
        for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
            na12878ReadGroups.add(readGroup.getId());
        }
        inputBamReader.close();

        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " -o %s",
                                                       1, Arrays.asList(""))// No MD5s; we only want to check the read groups

        final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0);
        final SAMFileReader outputBamReader = new SAMFileReader(outputBam);

        int totalReadGroupsSeen = 0;
        for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
            String expectedSampleName = "";
            if ( na12878ReadGroups.contains(readGroup.getId()) ) {
                expectedSampleName = "newSampleFor12878";
            }
            else {
                expectedSampleName = "newSampleForNot12878";
            }

            Assert.assertEquals(readGroup.getSample(), expectedSampleName,
                                String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
            totalReadGroupsSeen++;
        }

        Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file");

        outputBamReader.close();
    }

    // On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException)
    @Test
    public void testOnTheFlySampleRenamingWithMultiSampleBam() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.MERGED.HEADERONLY.bam  myNewSampleName"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.MERGED.HEADERONLY.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " -o %s",
                                                       1,
                                                       UserException.class); // expecting a UserException here

        executeTest("testOnTheFlySampleRenamingWithMultiSampleBam", spec);
    }

    // On-the-fly sample renaming test case: ensure that walkers can see the remapped sample names in individual reads
    @Test
    public void testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam  myNewSampleName"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T OnTheFlySampleRenamingVerifyingTestWalker" +
                                                       " -R " + b37KGReference +
                                                       " -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam" +
                                                       " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                                                       " --newSampleName myNewSampleName" +
                                                       " -L 20:10000000-10001000",
                                                       1, Arrays.asList(""));

        // Test is a success if our custom walker doesn't throw an exception
        executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads", spec);
    }

    @Test
    public void testOnTheFlySampleRenamingSingleSampleVCF() throws IOException {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf  newSampleForNA12878"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
                " -R " + b37KGReference +
                " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" +
                " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                " -o %s",
                1,
                Arrays.asList("")); // No MD5s -- we will inspect the output file manually

        final File outputVCF = executeTest("testOnTheFlySampleRenamingSingleSampleVCF", spec).first.get(0);
        verifySampleRenaming(outputVCF, "newSampleForNA12878");
    }

    private void verifySampleRenaming( final File outputVCF, final String newSampleName ) throws IOException {
        final Pair<VCFHeader, GATKVCFUtils.VCIterable<LineIterator>> headerAndVCIter = GATKVCFUtils.readAllVCs(outputVCF, new VCFCodec());
        final VCFHeader header = headerAndVCIter.getFirst();
        final GATKVCFUtils.VCIterable<LineIterator> iter = headerAndVCIter.getSecond();

        // Verify that sample renaming occurred at both the header and record levels (checking only the first 10 records):

        Assert.assertEquals(header.getGenotypeSamples().size(), 1, "Wrong number of samples in output vcf header");
        Assert.assertEquals(header.getGenotypeSamples().get(0), newSampleName, "Wrong sample name in output vcf header");

        int recordCount = 0;
        while ( iter.hasNext() && recordCount < 10 ) {
            final VariantContext vcfRecord = iter.next();
            Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in output vcf record");
            Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), newSampleName, "Wrong sample name in output vcf record");
            recordCount++;
        }
    }

    @Test
    public void testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords() throws Exception {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "samplerenametest_single_sample_gvcf.vcf    FOOSAMPLE"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T OnTheFlySampleRenamingVerifyingRodWalker" +
                " -R " + hg19Reference +
                " -V " + privateTestDir + "samplerenametest_single_sample_gvcf.vcf" +
                " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                " --expectedSampleName FOOSAMPLE" +
                " -o %s",
                1,
                Arrays.asList("")); // No MD5s -- custom walker will throw an exception if there's a problem

        executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords", spec);
    }

    @Test
    public void testOnTheFlySampleRenamingMultiSampleVCF() throws Exception {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "vcf/vcfWithGenotypes.vcf  badSample"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
                " -R " + b37KGReference +
                " -V " + privateTestDir + "vcf/vcfWithGenotypes.vcf" +
                " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                " -o %s",
                1,
                UserException.class); // expecting a UserException here

        executeTest("testOnTheFlySampleRenamingMultiSampleVCF", spec);
    }

    @Test
    public void testOnTheFlySampleRenamingSitesOnlyVCF() throws Exception {
        final File sampleRenameMapFile = createTestSampleRenameMapFile(
                Arrays.asList(privateTestDir + "vcf/vcfWithoutGenotypes.vcf  badSample"));

        final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
                " -R " + b37KGReference +
                " -V " + privateTestDir + "vcf/vcfWithoutGenotypes.vcf" +
                " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
                " -o %s",
                1,
                UserException.class); // expecting a UserException here

        executeTest("testOnTheFlySampleRenamingSitesOnlyVCF", spec);
    }

    private File createTestSampleRenameMapFile( final List<String> contents ) throws IOException {
        final File mapFile = createTempFile("TestSampleRenameMapFile", ".tmp");
        final PrintWriter writer = new PrintWriter(mapFile);

        for ( final String line : contents ) {
            writer.println(line);
        }
        writer.close();

        return mapFile;
    }

    public static class OnTheFlySampleRenamingVerifyingTestWalker extends ReadWalker<Integer, Integer> {
        @Argument(fullName = "newSampleName", shortName = "newSampleName", doc = "", required = true)
        String newSampleName = null;

        public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
            if ( ! newSampleName.equals(read.getReadGroup().getSample()) ) {
                throw new IllegalStateException(String.format("Encountered read with the wrong sample name. Expected %s found %s",
                                                              newSampleName, read.getReadGroup().getSample()));
            }

            return 1;
        }

        public Integer reduceInit() { return 0; }
        public Integer reduce(Integer value, Integer sum) { return value + sum; }
    }

    public static class OnTheFlySampleRenamingVerifyingRodWalker extends RodWalker<Integer, Integer> {
        @Argument(fullName = "expectedSampleName", shortName = "expectedSampleName", doc = "", required = true)
        String expectedSampleName = null;

        @Output
        PrintStream out;

        @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
        public RodBinding<VariantContext> variants;

        public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
            if ( tracker == null ) {
                return 0;
            }

            for ( final VariantContext vc : tracker.getValues(variants, context.getLocation()) ) {
                if ( vc.getSampleNames().size() != 1 ) {
                    throw new IllegalStateException("Encountered a vcf record with num samples != 1");
                }

                final String actualSampleName = vc.getSampleNames().iterator().next();
                if ( ! expectedSampleName.equals(actualSampleName)) {
                    throw new IllegalStateException(String.format("Encountered vcf record with wrong sample name. Expected %s found %s",
                                                                  expectedSampleName, actualSampleName));
                }
            }

            return 1;
        }

        public Integer reduceInit() {
            return 0;
        }

        public Integer reduce(Integer counter, Integer sum) {
            return counter + sum;
        }
    }

    // --------------------------------------------------------------------------------
    //
    // Test output file-specific options
    //
    // --------------------------------------------------------------------------------

    //Returns the output file
    private File testBAMFeatures(final String args, final String md5) {
        WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference +
                " -I " + privateTestDir + "NA20313.highCoverageRegion.bam"
                + " --no_pg_tag -o %s " + args,
                1, Arrays.asList(".bam"), Arrays.asList(md5));
        return executeTest("testBAMFeatures: "+args, spec).first.get(0);
    }

    @Test
    public void testSAMWriterFeatures() {
        testBAMFeatures("-compress 0", "bb4b55b1f80423970bb9384cbf0d8793");
        testBAMFeatures("-compress 9", "b85ee1636d62e1bb8ed65a245c307167");
        testBAMFeatures("-simplifyBAM", "38f9c30a27dfbc085a2ff52a1617d579");

        //Validate MD5
        final String expectedMD5 = "6627b9ea33293a0083983feb94948c1d";
        final File md5Target = testBAMFeatures("--generate_md5", expectedMD5);
        final File md5File = new File(md5Target.getAbsoluteFile() + ".md5");
        md5File.deleteOnExit();
        Assert.assertTrue(md5File.exists(), "MD5 wasn't created");
        try {
            String md5 = new BufferedReader(new FileReader(md5File)).readLine();
            Assert.assertEquals(md5, expectedMD5, "Generated MD5 doesn't match expected");
        } catch (IOException e) {
            Assert.fail("Can't parse MD5 file", e);
        }

        //Validate that index isn't created
        final String unindexedBAM = testBAMFeatures("--disable_bam_indexing", expectedMD5).getAbsolutePath();
        Assert.assertTrue(!(new File(unindexedBAM+".bai").exists()) &&
                          !(new File(unindexedBAM.replace(".bam", ".bai")).exists()),
                          "BAM index was created even though it was disabled");
    }

    private void testVCFFeatures(final String args, final String md5) {
        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
                " -V " + privateTestDir + "CEUtrioTest.vcf"
                + " --no_cmdline_in_header -o %s " + args,
                1, Arrays.asList(md5));
        executeTest("testVCFFeatures: "+args, spec);
    }

    private void testVCFFormatHandling(final boolean writeFullFormat, final String md5) {
        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
                " -V " + privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"
                + " --no_cmdline_in_header -o %s "
                + " --fullyDecode " //Without this parameter, the FORMAT fields will be emitted unchanged.  Oops
                + (writeFullFormat ? "-writeFullFormat" : "") ,
                1, Arrays.asList(md5));
        executeTest("testVCFFormatHandling: "+(writeFullFormat ? "Untrimmed" : "Trimmed"), spec);
    }

    @Test
    public void testVCFWriterFeatures() {
        testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716");
        testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3");
        testVCFFormatHandling(true, "2b0fa660b0cef4b0f45a10febb453b6c");
        testVCFFormatHandling(false, "5960311fdd9ee6db88587efaaf4055a0");
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.EngineFeaturesIntegrationTest$EngineErrorHandlingTestProvider

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
reate', 'UA-20639858-1', 'auto'); ga('send', 'pageview');