Package picard.illumina

Source Code of picard.illumina.CollectIlluminaBasecallingMetrics$IlluminaMetricCounts

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.illumina;

import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Illumina;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.illumina.parser.ClusterData;import picard.illumina.parser.IlluminaDataProvider;import picard.illumina.parser.IlluminaDataProviderFactory;import picard.illumina.parser.IlluminaDataType;import picard.illumina.parser.ReadStructure;import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import picard.util.TabbedTextFileWithHeaderParser;
import htsjdk.samtools.util.StringUtil;

import java.io.File;
import java.lang.Comparable;import java.lang.Double;import java.lang.Exception;import java.lang.Integer;import java.lang.Math;import java.lang.Override;import java.lang.String;import java.lang.StringBuilder;import java.text.DecimalFormat;
import java.util.SortedMap;
import java.util.TreeMap;

/***
*  A Command line tool to collect Illumina Basecalling metrics for a sequencing run
*  Requires a Lane and an input file of Barcodes to expect.
*  Outputs metrics:
*    *  Mean Clusters Per Tile
*    *  Standard Deviation of Clusters Per Tile
*    *  Mean Pf Clusters Per Tile
*    *  Standard Deviation of Pf Clusters Per Tile
*    *  Mean Percentage of Pf Clusters Per Tile
*    *  Standard Deviation of Percentage of Pf Clusters Per Tile
*/
@CommandLineProgramProperties(
        usage = CollectIlluminaBasecallingMetrics.USAGE,
        usageShort = CollectIlluminaBasecallingMetrics.USAGE,
        programGroup = Illumina.class
)
public class CollectIlluminaBasecallingMetrics extends CommandLineProgram {
    //Command Line Arguments
    static final String USAGE = "Given an Illumina basecalling and a lane, produces per-lane-barcode basecalling metrics";
   
    @Option(doc="The Illumina basecalls output directory from which data are read", shortName="B")
    public File BASECALLS_DIR;

    @Option(doc="The lane whose data will be read", shortName = StandardOptionDefinitions.LANE_SHORT_NAME)
    public Integer LANE;

    // TODO: No longer optional after old workflows are through
    @Option(doc="The file containing barcodes to expect from the run - barcodeData.#",shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, optional = true)
    public File INPUT;

    @Option(doc=ReadStructure.PARAMETER_DOC, shortName="RS")
    public String READ_STRUCTURE;

    @Option(doc="The file to which the collected metrics are written", shortName= StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true)
    public File OUTPUT;

    private int barcodeLength = 0;
    private String unmatched_barcode;
    private final SortedMap<String, IlluminaMetricCounts> barcodeToMetricCounts;

    private static final String BARCODE_NAME_COLUMN = "barcode_name";
    private static final String BARCODE_SEQUENCE_COLUMN_NAME_STUB = "barcode_sequence_";

    public CollectIlluminaBasecallingMetrics() {
        this.barcodeToMetricCounts = new TreeMap<String, IlluminaMetricCounts>();
    }

    @Override
    protected int doWork() {
        // File and Directory Validation
        IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
        if (OUTPUT == null) OUTPUT = new File(BASECALLS_DIR, String.format("LANE%s_basecalling_metrics", LANE));
        IOUtil.assertFileIsWritable(OUTPUT);

        final IlluminaDataProviderFactory factory;
        final ReadStructure readStructure = new ReadStructure(READ_STRUCTURE);
        final BclQualityEvaluationStrategy bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY);

        if (INPUT == null) {
            // TODO: Legacy support. Remove when INPUT is required, after all old workflows are through
            factory = new IlluminaDataProviderFactory(BASECALLS_DIR, LANE, readStructure, bclQualityEvaluationStrategy,
                    IlluminaDataType.PF, IlluminaDataType.Position);
        } else {
            // Grab expected barcode data from barcodeData.<LANE>
            IOUtil.assertFileIsReadable(INPUT);
            final TabbedTextFileWithHeaderParser barcodesParser = new TabbedTextFileWithHeaderParser(INPUT);
            for (final TabbedTextFileWithHeaderParser.Row row : barcodesParser) {
                final String barcodeName = row.getField(BARCODE_NAME_COLUMN);
                final StringBuilder barcode = new StringBuilder();
                for (int i = 1; i <= readStructure.barcodes.length(); i++) {
                    barcode.append(row.getField(BARCODE_SEQUENCE_COLUMN_NAME_STUB + i));
                    if (barcodeLength == 0) barcodeLength = barcode.length();
                }

                // Only add the barcode to the hash if it has sequences. For libraries
                // that don't have barcodes this won't be set in the file.
                if (barcode.length() > 0) {
                    barcodeToMetricCounts.put(barcode.toString(), new IlluminaMetricCounts(barcode.toString(), barcodeName, LANE));
                }
            }

            factory = barcodeToMetricCounts.isEmpty()
                    ? new IlluminaDataProviderFactory(
                        BASECALLS_DIR,
                        LANE,
                        readStructure,
                        bclQualityEvaluationStrategy,
                        IlluminaDataType.PF,
                        IlluminaDataType.Position)
                    : new IlluminaDataProviderFactory(
                        BASECALLS_DIR,
                        LANE,
                        readStructure,
                        bclQualityEvaluationStrategy,
                        IlluminaDataType.PF,
                        IlluminaDataType.Position,
                        IlluminaDataType.Barcodes);
        }

        unmatched_barcode = StringUtil.repeatCharNTimes('N', barcodeLength);

        //Initialize data provider, iterate over clusters, and collect statistics
        final IlluminaDataProvider provider = factory.makeDataProvider();

        while (provider.hasNext()) {
            final ClusterData cluster = provider.next();
            addCluster(cluster);
        }

        onComplete();
        return 0;
    }

    /***
     * Process new cluster of Illumina data - increment a running counter of data
     */
    private void addCluster(final ClusterData cluster) {
        //compute hash of Barcode and Lane for key
        String barcode = cluster.getMatchedBarcode();
        if (barcode == null) barcode = unmatched_barcode;

        //increment counts
        IlluminaMetricCounts counters =  barcodeToMetricCounts.get(barcode);
        if (counters == null) {
             counters = new IlluminaMetricCounts(barcode,null,LANE);
             barcodeToMetricCounts.put(barcode, counters);
        }
        final int tileNumber = cluster.getTile();
        counters.incrementClusterCount(tileNumber,cluster.isPf());
    }

    /**
     * Handles completion of metric collection. Metrics are computed from counts of data and written out to a file.
     */
    private void onComplete() {
        try {
            final MetricsFile<IlluminaBasecallingMetrics, Comparable<?>> file = getMetricsFile();
            final IlluminaMetricCounts allLaneCounts = new IlluminaMetricCounts(null, null, LANE);
            for (final String s : barcodeToMetricCounts.keySet()) {
                final IlluminaMetricCounts counts = barcodeToMetricCounts.get(s);
                counts.addMetricsToFile(file);
                allLaneCounts.addIlluminaMetricCounts(counts);
            }
            if (! barcodeToMetricCounts.keySet().contains("")) allLaneCounts.addMetricsToFile(file)// detect non-indexed case
            file.write(OUTPUT);
        } catch (final Exception ex) {
            throw new PicardException("Error writing output file " + OUTPUT.getPath(), ex);
        }
    }

    public static void main(final String[] argv) {
        new CollectIlluminaBasecallingMetrics().instanceMainWithExit(argv);
    }

    /***
     * This class manages counts of Illumina Basecalling data on a Per Barcode Per Lane basis.  Cluster and PFCluster
     * counts are stored per tile number.
     */
    private class IlluminaMetricCounts {
        /*** Stores counts of clusters found for a specific Barcode-Lane combination across all tiles.  Key = Tile Number, Value = count of clusters***/
        private final Histogram<Integer> tileToClusterHistogram;
        /*** Stores counts of pf clusters found for a specific Barcode-Lane combination across all tiles.  Key = Tile Number, Value = count of clusters***/
        private final Histogram<Integer> tileToPfClusterHistogram;
        final IlluminaBasecallingMetrics metrics;

        public IlluminaMetricCounts(final String barcode, final String barcodeName, final Integer laneNumber) {
            this.tileToClusterHistogram = new Histogram<Integer>();
            this.tileToPfClusterHistogram = new Histogram<Integer>();
            this.metrics = new IlluminaBasecallingMetrics();
            this.metrics.MOLECULAR_BARCODE_SEQUENCE_1 = barcode;
            this.metrics.MOLECULAR_BARCODE_NAME = barcodeName;
            this.metrics.LANE = Integer.toString(laneNumber);
        }

        /*  Increments cluster count by 1 for a given tile number */
        public void incrementClusterCount(final int tileNumber, final boolean isPf) {
            incrementClusterCount(tileNumber,1d, isPf);
        }

        /*  Increments cluster count by an amount for a given tile number */
        public void incrementClusterCount(final int tileNumber, final double incrementAmount, final boolean isPf) {
            incrementClusterCount(tileNumber, incrementAmount, (isPf ? 1d : 0d));
        }

        /*  Increments cluster count by an amount for a given tile number */
        public void incrementClusterCount(final Integer tileNumber, final double incrementAmount, final double pfIncrementAmount) {
            tileToClusterHistogram.increment(tileNumber, incrementAmount);
            tileToPfClusterHistogram.increment(tileNumber, pfIncrementAmount);
        }

        /* Handles calculating final metrics and updating the metric object */
        private void onComplete() {
            final double meanClustersPerTile =  tileToClusterHistogram.getMeanBinSize();
            metrics.MEAN_CLUSTERS_PER_TILE = Math.round(meanClustersPerTile);
            metrics.SD_CLUSTERS_PER_TILE = Math.round(tileToClusterHistogram.getStandardDeviationBinSize(meanClustersPerTile));

            final double meanPfClustersPerTile =  tileToPfClusterHistogram.getMeanBinSize();
            metrics.MEAN_PF_CLUSTERS_PER_TILE = Math.round(meanPfClustersPerTile);
            metrics.SD_PF_CLUSTERS_PER_TILE = Math.round(tileToPfClusterHistogram.getStandardDeviationBinSize(meanPfClustersPerTile));

            final DecimalFormat decFormat = new DecimalFormat("#.##");
            final Histogram<Integer> laneToPctPfClusterHistogram = tileToPfClusterHistogram.divideByHistogram(tileToClusterHistogram);
            final double meanPctPfClustersPerTile = laneToPctPfClusterHistogram.getMeanBinSize();
            metrics.MEAN_PCT_PF_CLUSTERS_PER_TILE = (Double.isNaN(meanPctPfClustersPerTile) 0 : Double.valueOf(decFormat.format(meanPctPfClustersPerTile * 100)));
            metrics.SD_PCT_PF_CLUSTERS_PER_TILE = Double.valueOf(decFormat.format(laneToPctPfClusterHistogram.getStandardDeviationBinSize(meanPctPfClustersPerTile) * 100));

            metrics.TOTAL_CLUSTERS = (long) this.tileToClusterHistogram.getSumOfValues();
            metrics.PF_CLUSTERS    = (long) this.tileToPfClusterHistogram.getSumOfValues();
           
            final ReadStructure readStructure = new ReadStructure(READ_STRUCTURE);
            int templateBaseCountPerCluster = 0;
            for (int i = 0; i < readStructure.templates.length(); i++) templateBaseCountPerCluster += readStructure.templates.get(i).length;
            metrics.TOTAL_READS = metrics.TOTAL_CLUSTERS * readStructure.templates.length();
            metrics.PF_READS = metrics.PF_CLUSTERS * readStructure.templates.length();
            metrics.TOTAL_BASES = metrics.TOTAL_CLUSTERS * templateBaseCountPerCluster;
            metrics.PF_BASES = metrics.PF_CLUSTERS * templateBaseCountPerCluster;
           
        }

        /* Computes final metric based on data counts and writes to output metric file */
        public void addMetricsToFile(final MetricsFile<IlluminaBasecallingMetrics, Comparable<?>> file) {
            onComplete();
            file.addMetric(metrics);
        }

        /*  Merges data from another IlluminaMetricCount object into current one.*/
        public void addIlluminaMetricCounts(final IlluminaMetricCounts counts) {
            this.tileToClusterHistogram.addHistogram(counts.tileToClusterHistogram);
            this.tileToPfClusterHistogram.addHistogram(counts.tileToPfClusterHistogram);
        }
    }
}
TOP

Related Classes of picard.illumina.CollectIlluminaBasecallingMetrics$IlluminaMetricCounts

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.