Package org.broadinstitute.gatk.engine.samples

Source Code of org.broadinstitute.gatk.engine.samples.SampleDB

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.samples;

import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.exceptions.GATKException;
import htsjdk.variant.variantcontext.Genotype;

import java.util.*;

/**
*
*/
public class SampleDB {
    /**
     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
     * this is stored as a HashMap.
     */
    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();

    /**
     * Constructor takes both a SAM header and sample files because the two must be integrated.
     */
    public SampleDB() {

    }

    /**
     * Protected function to add a single sample to the database
     *
     * @param sample to be added
     */
    protected SampleDB addSample(Sample sample) {
        Sample prev = samples.get(sample.getID());
        if ( prev != null )
            sample = Sample.mergeSamples(prev, sample);
        samples.put(sample.getID(), sample);
        return this;
    }

    // --------------------------------------------------------------------------------
    //
    // Functions for getting a sample from the DB
    //
    // --------------------------------------------------------------------------------

    /**
     * Get a sample by its ID
     * If an alias is passed in, return the main sample object
     * @param id
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(String id) {
        return samples.get(id);
    }

    /**
     *
     * @param read
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final SAMRecord read) {
        return getSample(read.getReadGroup());
    }

    /**
     *
     * @param rg
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final SAMReadGroupRecord rg) {
        return getSample(rg.getSample());
    }

    /**
     * @param g Genotype
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final Genotype g) {
        return getSample(g.getSampleName());
    }

    // --------------------------------------------------------------------------------
    //
    // Functions for accessing samples in the DB
    //
    // --------------------------------------------------------------------------------

    /**
     * Get number of sample objects
     * @return size of samples map
     */
    public int sampleCount() {
        return samples.size();
    }

    public Set<Sample> getSamples() {
        return new LinkedHashSet<>(samples.values());
    }

    public Collection<String> getSampleNames() {
        return Collections.unmodifiableCollection(samples.keySet());
    }


    /**
     * Takes a collection of sample names and returns their corresponding sample objects
     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
     * @param sampleNameList Set of sample names
     * @return Corresponding set of samples
     */
    public Set<Sample> getSamples(Collection<String> sampleNameList) {
        HashSet<Sample> samples = new HashSet<Sample>();
        for (String name : sampleNameList) {
            try {
                samples.add(getSample(name));
            }
            catch (Exception e) {
                throw new GATKException("Could not get sample with the following ID: " + name, e);
            }
        }
        return samples;
    }

    // --------------------------------------------------------------------------------
    //
    // Higher level pedigree functions
    //
    // --------------------------------------------------------------------------------

    /**
     * Returns a sorted set of the family IDs in all samples (excluding null ids)
     * @return
     */
    public final Set<String> getFamilyIDs() {
        return getFamilies().keySet();
    }

    /**
     * Returns a map from family ID -> set of family members for all samples with
     * non-null family ids
     *
     * @return
     */
    public final Map<String, Set<Sample>> getFamilies() {
        return getFamilies(null);
    }

    /**
     * Returns a map from family ID -> set of family members for all samples in sampleIds with
     * non-null family ids
     *
     * @param sampleIds - all samples to include. If null is passed then all samples are returned.
     * @return
     */
    public final Map<String, Set<Sample>> getFamilies(Collection<String> sampleIds) {
        final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();

        for ( final Sample sample : samples.values() ) {
            if(sampleIds == null || sampleIds.contains(sample.getID())){
                final String famID = sample.getFamilyID();
                if ( famID != null ) {
                    if ( ! families.containsKey(famID) )
                        families.put(famID, new TreeSet<Sample>());
                    families.get(famID).add(sample);
                }
            }
        }
        return families;
    }

    /**
     * Returns all the trios present in the sample database. The strictOneChild parameter determines
     * whether multiple children of the same parents resolve to multiple trios, or are excluded
     * @param strictOneChild - exclude pedigrees with >1 child for parental pair
     * @return - all of the mother+father=child triplets, subject to strictOneChild
     */
    public final Set<Trio> getTrios(boolean strictOneChild) {
        Set<Trio> trioSet = new HashSet<Trio>();
        for ( String familyString : getFamilyIDs() ) {
            Set<Sample> family = getFamily(familyString);
            for ( Sample sample : family) {
                if ( sample.getParents().size() == 2 ) {
                    Trio trio = new Trio(sample.getMother(),sample.getFather(),sample);
                    trioSet.add(trio);
                }
            }
        }

        if ( strictOneChild )
            trioSet = removeTriosWithSameParents(trioSet);

        return trioSet;
    }

    /**
     * Returns all the trios present in the db. See getTrios(boolean strictOneChild)
     * @return all the trios present in the samples db.
     */
    public final Set<Trio> getTrios() {
        return getTrios(false);
    }

    /**
     * Subsets a set of trios to only those with nonmatching founders. If two (or more) trio objects have
     * the same mother and father, then both (all) are removed from the returned set.
     * @param trios - a set of Trio objects
     * @return those subset of Trio objects in the input set with nonmatching founders
     */
    private Set<Trio> removeTriosWithSameParents(final Set<Trio> trios) {
        Set<Trio> filteredTrios = new HashSet<Trio>();
        filteredTrios.addAll(trios);
        Set<Trio> triosWithSameParents = new HashSet<Trio>();
        for ( Trio referenceTrio : filteredTrios ) {
            for ( Trio compareTrio : filteredTrios ) {
                if ( referenceTrio != compareTrio &&
                     referenceTrio.getFather().equals(compareTrio.getFather()) &&
                     referenceTrio.getMother().equals(compareTrio.getMother()) ) {
                    triosWithSameParents.add(referenceTrio);
                    triosWithSameParents.add(compareTrio);
                }
            }
        }
        filteredTrios.removeAll(triosWithSameParents);
        return filteredTrios;
    }

    /**
     * Returns the set of all children that have both of their parents.
     * Note that if a family is composed of more than 1 child, each child is
     * returned.
     * @return - all the children that have both of their parents
     * @deprecated - getTrios() replaces this function
     */
    @Deprecated
    public final Set<Sample> getChildrenWithParents(){
        return getChildrenWithParents(false);
    }

    /**
     * Returns the set of all children that have both of their parents.
     * Note that if triosOnly = false, a family is composed of more than 1 child, each child is
     * returned.
     *
     * This method can be used wherever trios are needed
     *
     * @param triosOnly - if set to true, only strict trios are returned
     * @return - all the children that have both of their parents
     * @deprecated - getTrios(boolean strict) replaces this function
     * @bug -- does not work for extracting multiple generations of trios, e.g.
     * ..........Mom1------Dad1
     * ................|
     * ..............Child1--------Mom2
     * .......................|
     * .....................Child2
     */
    @Deprecated
    public final Set<Sample> getChildrenWithParents(boolean triosOnly) {

        Map<String, Set<Sample>> families = getFamilies();
        final Set<Sample> childrenWithParents = new HashSet<Sample>();
        Iterator<Sample> sampleIterator;

        for ( Set<Sample> familyMembers: families.values() ) {
            if(triosOnly && familyMembers.size() != 3)
                continue;

            sampleIterator = familyMembers.iterator();
            Sample sample;
            while(sampleIterator.hasNext()){
                sample = sampleIterator.next();
                if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents()))
                    childrenWithParents.add(sample);
            }

        }
        return childrenWithParents;
    }

    /**
     * Return all samples with a given family ID
     * @param familyId
     * @return
     */
    public Set<Sample> getFamily(String familyId) {
        return getFamilies().get(familyId);
    }

    /**
     * Returns all children of a given sample
     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
     * @param sample
     * @return
     */
    public Set<Sample> getChildren(Sample sample) {
        final HashSet<Sample> children = new HashSet<Sample>();
        for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
            if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
                children.add(familyMember);
            }
        }
        return children;
    }

    public Set<String> getFounderIds(){
        Set<String> founders = new HashSet<String>();
        for(Sample sample : getSamples()){
            if(sample.getParents().size()<1)
                founders.add(sample.getID());

        }
        return founders;
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.samples.SampleDB

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.