Package org.broadinstitute.gatk.tools.walkers.annotator

Source Code of org.broadinstitute.gatk.tools.walkers.annotator.VariantOverlapAnnotator

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.tools.walkers.annotator;

import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.vcf.VCFConstants;

import java.util.*;

/**
* Annotate the ID field and attribute overlap FLAGs for a VariantContext against a RefMetaDataTracker or a list
* of VariantContexts
*/
public final class VariantOverlapAnnotator {
    final RodBinding<VariantContext> dbSNPBinding;
    final Map<RodBinding<VariantContext>, String> overlapBindings;
    final GenomeLocParser genomeLocParser;

    /**
     * Create a new VariantOverlapAnnotator without overall bindings
     *
     * @see #VariantOverlapAnnotator(org.broadinstitute.gatk.utils.commandline.RodBinding, java.util.Map, org.broadinstitute.gatk.utils.GenomeLocParser)
     */
    public VariantOverlapAnnotator(RodBinding<VariantContext> dbSNPBinding, GenomeLocParser genomeLocParser) {
        this(dbSNPBinding, Collections.<RodBinding<VariantContext>, String>emptyMap(), genomeLocParser);
    }

    /**
     * Create a new VariantOverlapAnnotator
     *
     * @param dbSNPBinding the RodBinding to use for updating ID field values, or null if that behavior isn't desired
     * @param overlapBindings a map of RodBindings / name to use for overlap annotation.  Each binding will be used to
     *                        add name => true for variants that overlap with variants found to a
     *                        RefMetaDataTracker at each location.  Can be empty but not null
     * @param genomeLocParser the genome loc parser we'll use to create GenomeLocs for VariantContexts
     */
    public VariantOverlapAnnotator(RodBinding<VariantContext> dbSNPBinding, Map<RodBinding<VariantContext>, String> overlapBindings, GenomeLocParser genomeLocParser) {
        if ( overlapBindings == null ) throw new IllegalArgumentException("overlapBindings cannot be null");
        if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");

        this.dbSNPBinding = dbSNPBinding;
        this.overlapBindings = overlapBindings;
        this.genomeLocParser = genomeLocParser;
    }

    /**
     * Update rsID in vcToAnnotate with rsIDs from dbSNPBinding fetched from tracker
     * @see #annotateOverlap(java.util.List, String, htsjdk.variant.variantcontext.VariantContext)
     *
     * @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
     *                for VariantContexts bound to dbSNPBinding that start at vcToAnnotate
     * @param vcToAnnotate a variant context to annotate
     * @return a VariantContext (may be == to vcToAnnotate) with updated rsID value
     */
    public VariantContext annotateRsID(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) {
        if ( dbSNPBinding != null ) {
            final GenomeLoc loc = getLoc(vcToAnnotate);
            return annotateRsID(tracker.getValues(dbSNPBinding, loc), vcToAnnotate);
        } else {
            return vcToAnnotate;
        }
    }

    /**
     * Update rsID of vcToAnnotate with rsID match found in vcsAtLoc, if one exists
     *
     * @param vcsAtLoc a list of variant contexts starting at this location to use as sources for rsID values
     * @param vcToAnnotate a variant context to annotate
     * @return a VariantContext (may be == to vcToAnnotate) with updated rsID value
     */
    public VariantContext annotateRsID(final List<VariantContext> vcsAtLoc, final VariantContext vcToAnnotate ) {
        final String rsID = getRsID(vcsAtLoc, vcToAnnotate);

        // add the ID if appropriate
        if ( rsID != null ) {
            final VariantContextBuilder vcb = new VariantContextBuilder(vcToAnnotate);

            if ( ! vcToAnnotate.hasID() ) {
                return vcb.id(rsID).make();
            } else if ( ! vcToAnnotate.getID().contains(rsID) ) {
                return vcb.id(vcToAnnotate.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID).make();
            } // falling through to return VC lower down
        }

        // nothing to do, just return vc
        return vcToAnnotate;
    }

    private GenomeLoc getLoc(final VariantContext vc) {
        return genomeLocParser.createGenomeLoc(vc);
    }

    /**
     * Add overlap attributes to vcToAnnotate against all overlapBindings in tracker
     *
     * @see #annotateOverlap(java.util.List, String, htsjdk.variant.variantcontext.VariantContext)
     * for more information
     *
     * @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
     *                for VariantContexts bound to dbSNPBinding that start at vcToAnnotate
     * @param vcToAnnotate a variant context to annotate
     * @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value
     */
    public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) {
        if ( overlapBindings.isEmpty() ) return vcToAnnotate;

        VariantContext annotated = vcToAnnotate;
        final GenomeLoc loc = getLoc(vcToAnnotate);
        for ( final Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
            annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), annotated);
        }

        return annotated;
    }

    /**
     * Add overlaps flag attributes to vcToAnnotate binding overlapTestVCs.getSource() => true if
     * an overlapping variant context can be found in overlapTestVCs with vcToAnnotate
     *
     * Overlaps here means that the reference alleles are the same and at least one alt
     * allele in vcToAnnotate is equals to one of the alt alleles in overlapTestVCs
     *
     * @param overlapTestVCs a non-null list of potential overlaps that start at vcToAnnotate
     * @param attributeKey the key to set to true in the attribute map for vcToAnnotate if it overlaps
     * @param vcToAnnotate a non-null VariantContext to annotate
     * @return
     */
    public VariantContext annotateOverlap(final List<VariantContext> overlapTestVCs, final String attributeKey, VariantContext vcToAnnotate) {
        if ( overlapBindings.isEmpty() ) return vcToAnnotate;

        final boolean overlaps = overlaps(overlapTestVCs, vcToAnnotate);
        if ( overlaps ) {
            return new VariantContextBuilder(vcToAnnotate).attribute(attributeKey, true).make();
        } else {
            return vcToAnnotate;
        }
    }

    /**
     * Returns the ID field of the first VariantContext in rsIDSourceVCs that has the same reference allele
     * as vcToAnnotate and all of the alternative alleles in vcToAnnotate.
     *
     * Doesn't require vcToAnnotate to be a complete match, so
     *
     * A/C/G in VC in rsIDSourceVCs
     *
     * would match the a VC with A/C but not A/T.  Also we don't require all alleles to match
     * so we would also match A/C/T to A/C/G.
     *
     * Will only match rsIDSourceVCs that aren't failing filters.
     *
     * @param rsIDSourceVCs a non-null list of potential overlaps that start at vcToAnnotate
     * @param vcToAnnotate a non-null VariantContext to annotate
     * @return a String to use for the rsID from rsIDSourceVCs if one matches, or null if none matches
     */
    private String getRsID(final List<VariantContext> rsIDSourceVCs, final VariantContext vcToAnnotate) {
        if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null");
        if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null");

        for ( final VariantContext vcComp : rsIDSourceVCs ) {
            if ( vcComp.isFiltered() ) continue; // don't process any failed VCs

            if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() )
                throw new IllegalArgumentException("source rsID VariantContext " + vcComp + " doesn't start at the same position as vcToAnnotate " + vcToAnnotate);

            if ( vcToAnnotate.getReference().equals(vcComp.getReference()) ) {
                for ( final Allele allele : vcToAnnotate.getAlternateAlleles() ) {
                    if ( vcComp.getAlternateAlleles().contains(allele) )
                        return vcComp.getID();
                }
            }
        }

        return null;
    }

    /**
     * Does vcToAnnotate overlap with any of the records in potentialOverlaps?
     *
     * @param potentialOverlaps a non-null list of potential overlaps that start at vcToAnnotate
     * @param vcToAnnotate a non-null VariantContext to annotate
     * @return true if vcToAnnotate overlaps (position and all alt alleles) with some variant in potentialOverlaps
     */
    private boolean overlaps(final List<VariantContext> potentialOverlaps, final VariantContext vcToAnnotate) {
        return getRsID(potentialOverlaps, vcToAnnotate) != null;
    }

    /**
     * Get the collection of the RodBinding names for those being used for overlap detection
     * @return a non-null collection of Strings
     */
    public Collection<String> getOverlapNames() {
        return overlapBindings.values();
    }
}
TOP

Related Classes of org.broadinstitute.gatk.tools.walkers.annotator.VariantOverlapAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.