Package org.broadinstitute.gatk.tools.walkers.qc

Source Code of org.broadinstitute.gatk.tools.walkers.qc.CountRODs$Datum

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.tools.walkers.qc;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.tribble.Feature;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Input;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.refdata.utils.RODRecordList;
import org.broadinstitute.gatk.engine.walkers.NanoSchedulable;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.engine.walkers.TreeReducible;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.collections.ExpandingArrayList;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;

import java.io.PrintStream;
import java.util.*;

/**
* Prints out counts of the number of reference ordered data objects encountered.
*
* <p>CountRods is a RODWalker, and so traverses the data by ROD. For example if the ROD passed to it is a VCF file,
* it will count the variants in the file.</p>
*
* <p>Note that this tool is different from CountRodsByRef which is a RefWalker, and so traverses the data by
* position along the reference. CountRodsByRef can count ROD elements (such as, but not limited to, variants) found
* at each position or within specific intervals if you use the -L argument (see CommandLineGATK).</p>
*
* <p>Both these tools are different from CountVariants in that they are more generic (they can also count RODs that
* are not variants) and CountVariants is more detailed, in that it computes additional statistics (type of variants
* being indels vs. SNPs etc). </p>
*
* <h3>Input</h3>
* <p>
* One or more rod files.
* </p>
*
* <h3>Output</h3>
* <p>
* Number of rods seen.
* </p>
*
* <h3>Example</h3>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
*   -T CountRODs \
*   -R ref.fasta \
*   -o output.txt \
*   --rod input.vcf
* </pre>
*
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>>, NanoSchedulable {
    @Output
    public PrintStream out;

    /**
     * One or more input rod files
     */
    @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=true)
    public List<RodBinding<Feature>> rods = Collections.emptyList();

    @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false)
    public boolean verbose = false;

    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, this tool will print out the skipped locations", required = false)
    public boolean showSkipped = false;

    @Override
    public Pair<ExpandingArrayList<Long>, Long> treeReduce(Pair<ExpandingArrayList<Long>, Long> lhs, Pair<ExpandingArrayList<Long>, Long> rhs) {
        ExpandingArrayList<Long> nt = new ExpandingArrayList<Long>();
        nt.addAll(lhs.first);
        int index = 0;
        for (Long l : rhs.first) {
            if (nt.get(index) == null)
                nt.add(l);
            else
                nt.set(index,nt.get(index) + l);
            index++;
        }
        return new Pair<ExpandingArrayList<Long>, Long>(nt, lhs.second + rhs.second);
    }

    public class Datum {
        public long nRodsAtThisLocation = 0;
        public long nSkippedBases =0;
        public long nTotalBases = 0;

        public Datum( long nRodsAtThisLocation, long nSkippedBases, long nTotalBases ) {
            this.nRodsAtThisLocation = nRodsAtThisLocation;
            this.nSkippedBases = nSkippedBases;
            this.nTotalBases = nTotalBases;
        }

        public String toString() {
            return String.format("<%d %d %d>", nRodsAtThisLocation, nSkippedBases, nTotalBases);
        }
    }

    public Datum map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        GenomeLoc cur = context.getLocation();

        if ( verbose && showSkipped ) {
            for(long i = context.getSkippedBases(); i >= 0; i--) {
                SAMSequenceDictionary dictionary = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary();
                SAMSequenceRecord contig = dictionary.getSequence(cur.getContig());
                if(cur.getStop() < contig.getSequenceLength())
                    cur = getToolkit().getGenomeLocParser().incPos(cur,1);
                else
                    cur = getToolkit().getGenomeLocParser().createGenomeLoc(dictionary.getSequence(contig.getSequenceIndex()+1).getSequenceName(),1,1);
                out.printf("%s: skipped%n", cur);

            }
        }

        long nRodsHere = 0;
        long nTotalBases = 0;

        if ( ref == null ) {
            // we're getting the last skipped update
            if ( verbose )
                out.printf("Last position was %s: skipping %d bases%n",
                        context.getLocation(), context.getSkippedBases() );
            nRodsHere = -1; // don't update this
            nTotalBases = context.getSkippedBases();
        } else {
            Collection<RODRecordList> rods = new LinkedList<RODRecordList>();
            for ( RODRecordList rod : tracker.getBoundRodTracks() ) {
                //System.out.printf("Considering rod %s%n", rod);
                if ( rod.getLocation().getStart() == context.getLocation().getStart() && ! rod.getName().equals("interval") ) {
                    // only consider the first element
                    //System.out.printf("adding it%n");
                    rods.add(rod);
                }
            }

            nRodsHere = rods.size();

            if ( nRodsHere > 0 ) {
                if ( verbose ) {
                    List<String> names = new ArrayList<String>();
                    for ( RODRecordList rod : rods ) {
                        names.add(rod.getName());
                    }

                    //System.out.printf("context is %s", context.getSkippedBases());
                    out.printf("At %s: found %d rod(s) [%s] after skipping %d bases%n",
                            context.getLocation(), nRodsHere, Utils.join(",", names), context.getSkippedBases() );
                }
            }

            nTotalBases = context.getSkippedBases() + 1;
        }

        return new Datum(nRodsHere, context.getSkippedBases(), nTotalBases);
    }

    public Pair<ExpandingArrayList<Long>, Long> reduceInit() {
        return new Pair<ExpandingArrayList<Long>, Long>(new ExpandingArrayList<Long>(), 0l);
    }

    private void updateCounts(ExpandingArrayList<Long> counts, long nRods, long nObs) {
        if ( nRods >= 0 ) {
            long prev = counts.get((int)nRods) == null ? 0l : counts.get((int)nRods);
            counts.set((int)nRods, nObs + prev);
        }
    }

    public Pair<ExpandingArrayList<Long>, Long> reduce(Datum point, Pair<ExpandingArrayList<Long>, Long> sum) {
        ExpandingArrayList<Long> counts = sum.getFirst();
        updateCounts(counts, point.nRodsAtThisLocation, 1);
        updateCounts(counts, 0, point.nSkippedBases);

        Pair<ExpandingArrayList<Long>, Long> r = new Pair<ExpandingArrayList<Long>, Long>(counts, point.nTotalBases + sum.getSecond());

        //System.out.printf("Reduce: %s %s => %s%n", point, sum, r);
        return r;
    }
}
TOP

Related Classes of org.broadinstitute.gatk.tools.walkers.qc.CountRODs$Datum

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.