Package org.broadinstitute.gatk.engine.datasources.providers

Source Code of org.broadinstitute.gatk.engine.datasources.providers.ReferenceView

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.datasources.providers;

import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
/**
* User: hanna
* Date: May 22, 2009
* Time: 12:19:17 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/

/**
* A view into the reference backing this shard.
*/
public class ReferenceView implements View {
    /**
     * The parser, used to create and parse GenomeLocs.
     */
    protected final GenomeLocParser genomeLocParser;

    /**
     * The source of reference data.
     */
    protected IndexedFastaSequenceFile reference = null;

    /**
     * Create a new ReferenceView.
     * @param provider
     */
    public ReferenceView( ShardDataProvider provider ) {
        this.genomeLocParser = provider.getGenomeLocParser();
        this.reference = provider.getReference();
    }

    /**
     * Reference views don't conflict with anything else.
     * @return Empty list.
     */
    public Collection<Class<? extends View>> getConflictingViews() { return Collections.emptyList(); }

    /**
     * Deinitialize pointers for fast fail.  Someone else will handle file management.
     */
    public void close() {
        reference = null;
    }

    /**
     * Allow the user to pull reference info from any arbitrary region of the reference.
     * If parts of the reference don't exist, mark them in the char array with 'X'es.
     * @param genomeLoc The locus.
     * @return A list of the bases starting at the start of the locus (inclusive) and ending
     *         at the end of the locus (inclusive).
     */
    final static int BUFFER = 10000;
    final static byte[] Xs = new byte[BUFFER];
    static {
        Arrays.fill(Xs, (byte)'X');
    }

    protected byte[] getReferenceBases( SAMRecord read ) {
        return getReferenceBases(genomeLocParser.createGenomeLoc(read));

    }

    protected byte[] getReferenceBases( GenomeLoc genomeLoc ) {
        SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());

        long start = genomeLoc.getStart();
        long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );

        // Read with no aligned bases?  Return an empty array.
        if(stop - start + 1 == 0)
            return new byte[0];

        ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(), start, stop);

        int overhang = (int)(genomeLoc.getStop() - stop);
        if ( overhang > 0 ) {
            if ( overhang > BUFFER ) // todo -- this is a bit dangerous
                throw new ReviewedGATKException("Insufficient buffer size for Xs overhanging genome -- expand BUFFER");
            byte[] all = new byte[subsequence.getBases().length + overhang];
            System.arraycopy(subsequence.getBases(), 0, all, 0, subsequence.getBases().length);
            System.arraycopy(Xs, 0, all, subsequence.getBases().length, overhang);
            return all;
        } else {
            // fast path
            return subsequence.getBases();
        }
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.providers.ReferenceView

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.