Package picard.illumina

Source Code of picard.illumina.ExtractIlluminaBarcodesTest

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina;

import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.IlluminaDataProvider;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import picard.util.BasicInputParser;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* @author alecw@broadinstitute.org
*/
public class ExtractIlluminaBarcodesTest extends CommandLineProgramTest {
    private static final File SINGLE_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls");
    private static final File DUAL_DATA_DIR = new File("testdata/picard/illumina/25T8B8B25T/Data/Intensities/BaseCalls");
    private static final String[] BARCODES = {
            "CAACTCTC",
            "CAACTCTG", // This one is artificial -- one edit away from the first one
            "ACAGGTAT",
            "GACCGTTG",
            "ATTATCAA",
            "TGCTGCTG",
            "AACAATGG",
            "TGTAATCA",
            "GCCGTCGA",
            "GTCCACAG",
            "TTGTCTAT",
            "GTGGAGAC",
            "TTGCAAAT"
    };

    private File basecallsDir;
    private File dual;
    private File qual;

    public String getCommandLineProgramName() {
        return ExtractIlluminaBarcodes.class.getSimpleName();
    }

    @BeforeTest
    private void setUp() throws Exception {
        basecallsDir = File.createTempFile("eib.", ".tmp");
        Assert.assertTrue(basecallsDir.delete());
        Assert.assertTrue(basecallsDir.mkdir());
        IOUtil.copyDirectoryTree(SINGLE_DATA_DIR, basecallsDir);
        dual = File.createTempFile("eib_dual", ".tmp");
        Assert.assertTrue(dual.delete());
        Assert.assertTrue(dual.mkdir());
        IOUtil.copyDirectoryTree(DUAL_DATA_DIR, dual);
        qual = File.createTempFile("eib_qual", ".tmp");
        Assert.assertTrue(qual.delete());
        Assert.assertTrue(qual.mkdir());
        IOUtil.copyDirectoryTree(DUAL_DATA_DIR, qual);
    }

    @AfterTest
    private void tearDown() {
        IOUtil.deleteDirectoryTree(basecallsDir);
        IOUtil.deleteDirectoryTree(dual);
        IOUtil.deleteDirectoryTree(qual);
    }

    @Test
    public void testSingleEndWithBarcodeAtStart() throws Exception {
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "8B25T");
        Assert.assertEquals(metricsFile.getMetrics().get(11).PERFECT_MATCHES, 1);
    }

    @Test
    public void testSingleEndWithBarcodeAtEnd() throws Exception {
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B");
        Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
    }

    @Test
    public void testPairedEndWithBarcodeOnFirstEnd() throws Exception {
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B25T");
        Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
    }

    @Test
    public void testPairedEndWithBarcodeOnSecondEnd() throws Exception {
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T25T8B");
        Assert.assertEquals(metricsFile.getMetrics().get(12).PERFECT_MATCHES, 1);
    }

    @Test
    public void testNonWritableOutputFile() throws Exception {
        final File existingFile = new File(basecallsDir, "s_1_1101_barcode.txt.gz");
        try {
            existingFile.setReadOnly();
            final String readStructure = "25T8B25T";
            final int lane = 1;

            final File metricsFile = File.createTempFile("eib.", ".metrics");
            metricsFile.deleteOnExit();

            final List<String> args = new ArrayList<String>(Arrays.asList(
                    "BASECALLS_DIR=" + basecallsDir.getPath(),
                    "LANE=" + lane,
                    "READ_STRUCTURE=" + readStructure,
                    "METRICS_FILE=" + metricsFile.getPath(),
                    "COMPRESS_OUTPUTS=true"
            ));
            for (final String barcode : BARCODES) {
                args.add("BARCODE=" + barcode);
            }
            Assert.assertEquals(runPicardCommandLine(args), 4);
        }
        finally {
            existingFile.setWritable(true);
        }

    }

    /**
     * 4 cases tested:
     * * exact match to ACAGTG
     * * inexact match within threshold to TGACCA
     * * inexact match not within threshold to TGACCA
     * * inexact match where the next match is too close to ACAGTG
     * @throws Exception
     */
    @Test
    public void testBarcodeMatching() throws Exception {
        final int lane = 1;
        final int barcodePosition = 26;
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(lane, "25T8B25T");

        ExtractIlluminaBarcodes.BarcodeMetric metricOne = null;
        ExtractIlluminaBarcodes.BarcodeMetric metricTwo = null;
        ExtractIlluminaBarcodes.BarcodeMetric metricNoMatch = null;
        for (final ExtractIlluminaBarcodes.BarcodeMetric metric : metricsFile.getMetrics()) {
            if (metric.BARCODE.equals(BARCODES[0])) {
                metricOne = metric;
            } else if (metric.BARCODE.equals(BARCODES[2])) {
                metricTwo = metric;
            } else if (metric.BARCODE.equals("NNNNNNNN")) {
                metricNoMatch = metric;
            }
        }
        Assert.assertEquals(metricOne.PERFECT_MATCHES, 5);
        Assert.assertEquals(metricOne.ONE_MISMATCH_MATCHES, 0);
        Assert.assertEquals(metricOne.PF_READS, 3);
        Assert.assertEquals(metricOne.READS, 5);

        // one inexact match
        Assert.assertEquals(metricTwo.READS, 4);
        Assert.assertEquals(metricTwo.ONE_MISMATCH_MATCHES, 0);

        Assert.assertEquals(metricNoMatch.READS, 140);
        Assert.assertEquals(metricNoMatch.PF_READS, 112);

        // Check the barcode files themselves
        final File[] barcodeFiles = IOUtil.getFilesMatchingRegexp(basecallsDir, "s_" + lane + "_\\d{4}_barcode.txt");
        Arrays.sort(barcodeFiles);

        final BasicInputParser barcodeParser = new BasicInputParser(true, barcodeFiles);

        // Exact match
        String[] illuminaFields = barcodeParser.next();
        Assert.assertEquals(illuminaFields[1], "Y");
        Assert.assertEquals(illuminaFields[2], "CAACTCTC");

        // Inexact match
        illuminaFields = barcodeParser.next();
        Assert.assertEquals(illuminaFields[1], "Y");
        Assert.assertEquals(illuminaFields[2], "ACAGGTAT");

        // Too many mismatches
        illuminaFields = barcodeParser.next();
        Assert.assertEquals(illuminaFields[1], "N");

        barcodeParser.close();

        // Tack on test of barcode-informed Illumina Basecall parsing
        final ReadStructure rs = new ReadStructure("25T8B25T");
        final IlluminaDataProviderFactory factory = new IlluminaDataProviderFactory(basecallsDir, lane, rs,
                new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY),
                IlluminaDataType.BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Barcodes);
        testParsing(factory, rs, metricOne, barcodePosition);
    }

    @Test
    public void testDualBarcodes() throws Exception {
        final File metricsFile = File.createTempFile("dual.", ".metrics");
        metricsFile.deleteOnExit();

        final String[] args = new String[] {
                "BASECALLS_DIR=" + dual.getAbsolutePath(),
                "LANE=" + 1,
                "METRICS_FILE=" + metricsFile.getPath(),
                "READ_STRUCTURE=" + "25T8B8B25T",
                "BARCODE=" + "CAATAGTCCGACTCTC"
        };

        Assert.assertEquals(runPicardCommandLine(args), 0);
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> result =  new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>();
        result.read(new FileReader(metricsFile));
        Assert.assertEquals(result.getMetrics().get(0).PERFECT_MATCHES, 1, "Got wrong number of perfect matches");
        Assert.assertEquals(result.getMetrics().get(0).ONE_MISMATCH_MATCHES, 0, "Got wrong number of one-mismatch matches");
    }

    /**
     *  Testing the quality thresholding. Looking at a single barcode (ACAGTG) with a min quality of 25 and no mismatches
     */
    @Test(dataProvider = "qualityBarcodeData")
    public void testQualityBarcodes(final int quality,
                                    final int maxMismatches, final int perfectMatches, final int oneMismatch,
                                    final String testName) throws Exception {
        final File metricsFile = File.createTempFile("qual.", ".metrics");
        metricsFile.deleteOnExit();

        final String[] args = new String[] {
                "BASECALLS_DIR=" + qual.getPath(),
                "LANE=" + 1,
                "READ_STRUCTURE=25T8B25T",
                "METRICS_FILE=" + metricsFile.getPath(),
                "MINIMUM_BASE_QUALITY=" + quality,
                "MAX_MISMATCHES=" + maxMismatches,
                "BARCODE=CAATAGTC"
        };

        Assert.assertEquals(runPicardCommandLine(args), 0);
        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> result =  new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>();
        result.read(new FileReader(metricsFile));
        Assert.assertEquals(result.getMetrics().get(0).PERFECT_MATCHES, perfectMatches, "Got wrong number of perfect matches for test: '" + testName + "'");
        Assert.assertEquals(result.getMetrics().get(0).ONE_MISMATCH_MATCHES, oneMismatch, "Got wrong number of one-mismatch matches for test: '" + testName + "'");
    }

    @DataProvider(name = "qualityBarcodeData")
    public Object[][] getQualityTestData() {
        return new Object[][] {
                {16, 0, 1, 0, "Barcode has good quality, 1 match"},
                {25, 0, 0, 0, "Barcode has quality failures, no matches"}
        };
    }

    private void testParsing(final IlluminaDataProviderFactory factory, final ReadStructure readStructure, final ExtractIlluminaBarcodes.BarcodeMetric metricACAGTG, final int barcodePosition) {

        int numReads = 0;

        final IlluminaDataProvider dataProvider = factory.makeDataProvider();
        while (dataProvider.hasNext()) {
            final ClusterData cluster = dataProvider.next();

            if(metricACAGTG.BARCODE.equals(cluster.getMatchedBarcode())) {
                ++numReads;
            }

            Assert.assertEquals(cluster.getRead(readStructure.templates.getIndices()[0]).getQualities().length, barcodePosition - 1);
            Assert.assertEquals(cluster.getRead(readStructure.templates.getIndices()[0]).getBases().length, barcodePosition - 1);
        }
        Assert.assertEquals(numReads, metricACAGTG.READS);
        dataProvider.close();
    }

    private MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> runIt(final int lane, final String readStructure)
            throws Exception {
        final File metricsFile = File.createTempFile("eib.", ".metrics");
        metricsFile.deleteOnExit();

        final List<String> args = new ArrayList<String>(Arrays.asList(
                "BASECALLS_DIR=" + basecallsDir.getPath(),
                "LANE=" + lane,
                "READ_STRUCTURE=" + readStructure,
                "METRICS_FILE=" + metricsFile.getPath()
        ));
        for (final String barcode : BARCODES) {
            args.add("BARCODE=" + barcode);
        }
        return runIt(args, metricsFile);
    }

    private MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> runIt(final List<String> args, final File metricsFile) throws Exception {
        // Generate _barcode.txt files and metrics file.
        Assert.assertEquals(runPicardCommandLine(args), 0);

        final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> retval =  new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>();
        retval.read(new FileReader(metricsFile));
        return retval;
    }
}
TOP

Related Classes of picard.illumina.ExtractIlluminaBarcodesTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.