Package org.broadinstitute.gatk.engine.alignment.bwa

Source Code of org.broadinstitute.gatk.engine.alignment.bwa.BWTFiles

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.alignment.bwa;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.StringUtil;
import org.broadinstitute.gatk.engine.alignment.reference.bwt.*;
import org.broadinstitute.gatk.engine.alignment.reference.packing.PackUtils;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;

import java.io.File;
import java.io.IOException;

/**
* Support files for BWT.
*
* @author mhanna
* @version 0.1
*/
public class BWTFiles {
    /**
     * ANN (?) file name.
     */
    public final File annFile;

    /**
     * AMB (?) file name.
     */
    public final File ambFile;

    /**
     * Packed reference sequence file.
     */
    public final File pacFile;

    /**
     * Reverse of packed reference sequence file.
     */
    public final File rpacFile;

    /**
     * Forward BWT file.
     */
    public final File forwardBWTFile;

    /**
     * Forward suffix array file.
     */
    public final File forwardSAFile;

    /**
     * Reverse BWT file.
     */
    public final File reverseBWTFile;

    /**
     * Reverse suffix array file.
     */
    public final File reverseSAFile;

    /**
     * Where these files autogenerated on the fly?
     */
    public final boolean autogenerated;

    /**
     * Create a new BWA configuration file using the given prefix.
     * @param prefix Prefix to use when creating the configuration.  Must not be null.
     */
    public BWTFiles(String prefix) {
        if(prefix == null)
            throw new ReviewedGATKException("Prefix must not be null.");
        annFile = new File(prefix + ".ann");
        ambFile = new File(prefix + ".amb");
        pacFile = new File(prefix + ".pac");
        rpacFile = new File(prefix + ".rpac");
        forwardBWTFile = new File(prefix + ".bwt");
        forwardSAFile = new File(prefix + ".sa");
        reverseBWTFile = new File(prefix + ".rbwt");
        reverseSAFile = new File(prefix + ".rsa");
        autogenerated = false;
    }

    /**
     * Hand-create a new BWTFiles object, specifying a unique file object for each type.
     * @param annFile ANN (alternate dictionary) file.
     * @param ambFile AMB (holes) files.
     * @param pacFile Packed representation of the forward reference sequence.
     * @param forwardBWTFile BWT representation of the forward reference sequence.
     * @param forwardSAFile SA representation of the forward reference sequence.
     * @param rpacFile Packed representation of the reversed reference sequence.
     * @param reverseBWTFile BWT representation of the reversed reference sequence.
     * @param reverseSAFile SA representation of the reversed reference sequence.
     */
    private BWTFiles(File annFile,
                     File ambFile,
                     File pacFile,
                     File forwardBWTFile,
                     File forwardSAFile,
                     File rpacFile,
                     File reverseBWTFile,
                     File reverseSAFile) {
        this.annFile = annFile;
        this.ambFile = ambFile;
        this.pacFile = pacFile;
        this.forwardBWTFile = forwardBWTFile;
        this.forwardSAFile = forwardSAFile;
        this.rpacFile = rpacFile;
        this.reverseBWTFile = reverseBWTFile;
        this.reverseSAFile = reverseSAFile;       
        autogenerated = true;
    }

    /**
     * Close out this files object, in the process deleting any temporary filse
     * that were created.
     */
    public void close() {
        if(autogenerated) {
            boolean success = true;
            success = annFile.delete();
            success &= ambFile.delete();
            success &= pacFile.delete();
            success &= forwardBWTFile.delete();
            success &= forwardSAFile.delete();
            success &= rpacFile.delete();
            success &= reverseBWTFile.delete();
            success &= reverseSAFile.delete();

            if(!success)
                throw new ReviewedGATKException("Unable to clean up autogenerated representation");
        }
    }

    /**
     * Create a new set of BWT files from the given reference sequence.
     * @param referenceSequence Sequence from which to build metadata.
     * @return A new object representing encoded representations of each sequence.
     */
    public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
        byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
        System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
        normalizeReferenceSequence(normalizedReferenceSequence);       

        File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
        try {
            // Write the ann and amb for this reference sequence.
            annFile = File.createTempFile("bwt",".ann");
            ambFile = File.createTempFile("bwt",".amb");

            SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
            dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));

            ANNWriter annWriter = new ANNWriter(annFile);
            annWriter.write(dictionary);
            annWriter.close();

            AMBWriter ambWriter = new AMBWriter(ambFile);
            ambWriter.writeEmpty(dictionary);
            ambWriter.close();

            // Write the encoded files for the forward version of this reference sequence.
            pacFile = File.createTempFile("bwt",".pac");
            bwtFile = File.createTempFile("bwt",".bwt");
            saFile = File.createTempFile("bwt",".sa");

            writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);

            // Write the encoded files for the reverse version of this reference sequence.
            byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence);

            rpacFile = File.createTempFile("bwt",".rpac");
            rbwtFile = File.createTempFile("bwt",".rbwt");
            rsaFile = File.createTempFile("bwt",".rsa");

            writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
        }
        catch(IOException ex) {
            throw new ReviewedGATKException("Unable to write autogenerated reference sequence to temporary files");
        }

        // Make sure that, at the very least, all temporary files are deleted on exit.
        annFile.deleteOnExit();
        ambFile.deleteOnExit();
        pacFile.deleteOnExit();
        bwtFile.deleteOnExit();
        saFile.deleteOnExit();
        rpacFile.deleteOnExit();
        rbwtFile.deleteOnExit();
        rsaFile.deleteOnExit();

        return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile);
    }

    /**
     * Write the encoded form of the reference sequence.  In the case of BWA, the encoded reference
     * sequence is the reference itself in PAC format, the BWT, and the suffix array.
     * @param referenceSequence The reference sequence to encode.
     * @param pacFile Target for the PAC-encoded reference.
     * @param bwtFile Target for the BWT representation of the reference.
     * @param suffixArrayFile Target for the suffix array encoding of the reference.
     * @throws java.io.IOException In case of issues writing to the file.
     */
    private static void writeEncodedReferenceSequence(byte[] referenceSequence,
                                               File pacFile,
                                               File bwtFile,
                                               File suffixArrayFile) throws IOException {
        PackUtils.writeReferenceSequence(pacFile,referenceSequence);

        BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
        BWTWriter bwtWriter = new BWTWriter(bwtFile);
        bwtWriter.write(bwt);
        bwtWriter.close();

        SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
        SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
        suffixArrayWriter.write(suffixArray);
        suffixArrayWriter.close();
    }

    /**
     * Convert the given reference sequence into a form suitable for building into
     * on-the-fly sequences.
     * @param referenceSequence The reference sequence to normalize.
     * @throws org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException if normalized sequence cannot be generated.
     */
    private static void normalizeReferenceSequence(byte[] referenceSequence) {
        StringUtil.toUpperCase(referenceSequence);
        for(byte base: referenceSequence) {
            if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
                throw new ReviewedGATKException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
        }
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.alignment.bwa.BWTFiles

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.