Package picard.sam

Source Code of picard.sam.SamAlignmentMerger$SeparateEndAlignmentIterator

package picard.sam;

import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.DelegatingIterator;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
* Class that takes in a set of alignment information in SAM format and merges it with the set
* of all reads for which alignment was attempted, stored in an unmapped SAM file.  This
* class overrides mergeAlignment in AbstractAlignmentNMerger and proceeds on the assumption that
* the underlying alignment records are aleady in query-name sorted order (true for bwa).  If
* they are not, the mergeAlignment method catches the IllegalStateException, forces a sort
* of the underlying alignment records, and tries again.
*
* @author ktibbett@broadinstitute.org
*/
public class SamAlignmentMerger extends AbstractAlignmentMerger {

    private final Log log = Log.getInstance(SamAlignmentMerger.class);
    private final List<File> alignedSamFile;
    private final List<File> read1AlignedSamFile;
    private final List<File> read2AlignedSamFile;
    private final int maxGaps;
    private boolean forceSort = false;

    /**
     * Constructor
     *
     * @param unmappedBamFile   The BAM file that was used as the input to the aligner, which will
     *                          include info on all the reads that did not map.  Required.
     * @param targetBamFile     The file to which to write the merged SAM records. Required.
     * @param referenceFasta    The reference sequence for the map files. Required.
     * @param programRecord     Program record for taget file SAMRecords created.
     * @param clipAdapters      Whether adapters marked in unmapped BAM file should be marked as
*                          soft clipped in the merged bam. Required.
     * @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the
*                          NM and UQ tags). Required.
     * @param alignedReadsOnly  Whether to output only those reads that have alignment data
     * @param alignedSamFile      The SAM file(s) with alignment information.  Optional.  If this is
*                            not provided, then read1AlignedSamFile and read2AlignedSamFile must be.
     * @param maxGaps             The maximum number of insertions or deletions permitted in an
*                            alignment.  Alignments with more than this many gaps will be ignored.
*                            -1 means to allow any number of gaps.
     * @param attributesToRetain  attributes from the alignment record that should be
     *                          removed when merging.  This overrides attributesToRetain if they share
     *                           common tags.
     * @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment.  Optional.
     * @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment.  Optional.
     * @param read1AlignedSamFile The alignment records for read1.  Used when the two ends of a read are
*                            aligned separately.  This is optional, but must be specified if
*                            alignedSamFile is not.
     * @param read2AlignedSamFile The alignment records for read1.  Used when the two ends of a read are
*                            aligned separately.  This is optional, but must be specified if
*                            alignedSamFile is not.
     * @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for
*                          aligned pairs.  Used to determine the properPair flag.
     * @param sortOrder           The order in which the merged records should be output.  If null,
*                            output will be coordinate-sorted
     * @param primaryAlignmentSelectionStrategy How to handle multiple alignments for a fragment or read pair,
*                                          in which none are primary, or more than one is marked primary
     * @param addMateCigar      True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include.
     */
    public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta,
                              final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence,
                              final boolean alignedReadsOnly,
                              final List<File> alignedSamFile, final int maxGaps, final List<String> attributesToRetain,
                              final List<String> attributesToRemove,
                              final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
                              final List<File> read1AlignedSamFile, final List<File> read2AlignedSamFile,
                              final List<SamPairUtil.PairOrientation> expectedOrientations,
                              final SortOrder sortOrder,
                              final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy,
                              final boolean addMateCigar) {

        super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence,
              alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed,
              read2BasesTrimmed, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy, addMateCigar);

        if ((alignedSamFile == null || alignedSamFile.size() == 0) &&
            (read1AlignedSamFile == null || read1AlignedSamFile.size() == 0 ||
             read2AlignedSamFile == null || read2AlignedSamFile.size() == 0)) {
            throw new IllegalArgumentException("Either alignedSamFile or BOTH of read1AlignedSamFile and " +
                    "read2AlignedSamFile must be specified.");
        }

        if (alignedSamFile != null) {
            for (final File f : alignedSamFile) {
                IOUtil.assertFileIsReadable(f);
            }
        } else {
            for (final File f : read1AlignedSamFile) {
                IOUtil.assertFileIsReadable(f);
            }
            for (final File f : read2AlignedSamFile) {
                IOUtil.assertFileIsReadable(f);
            }
        }

        this.alignedSamFile = alignedSamFile;
        this.read1AlignedSamFile = read1AlignedSamFile;
        this.read2AlignedSamFile = read2AlignedSamFile;
        this.maxGaps = maxGaps;
        if (programRecord == null) {
            final File tmpFile = this.alignedSamFile != null && this.alignedSamFile.size() > 0
                    ? this.alignedSamFile.get(0)
                    : this.read1AlignedSamFile.get(0);
            final SAMFileReader tmpReader = new SAMFileReader(tmpFile);
            tmpReader.setValidationStringency(ValidationStringency.SILENT);
            if (tmpReader.getFileHeader().getProgramRecords().size() == 1) {
                setProgramRecord(tmpReader.getFileHeader().getProgramRecords().get(0));
            }
            tmpReader.close();
        }

        log.info("Processing SAM file(s): " + alignedSamFile != null ? alignedSamFile : read1AlignedSamFile + "," + read2AlignedSamFile);
    }



    /**
     * Merges the alignment from the map file with the non-aligned records from the source BAM file.
     * Overrides mergeAlignment in AbstractAlignmentMerger.  Tries first to proceed on the assumption
     * that the alignment records are pre-sorted.  If not, catches the exception, forces a sort, and
     * tries again.
     */
    public void mergeAlignment() {
        try {
            super.mergeAlignment();
        }
        catch(final IllegalStateException ise) {
            log.warn("Exception merging bam alignment - attempting to sort aligned reads and try again: ", ise.getMessage());
            forceSort = true;
            resetRefSeqFileWalker();
            super.mergeAlignment();
        }
    }
    /**
     * Reads the aligned SAM records into a SortingCollection and returns an iterator over that collection
     */
    protected CloseableIterator<SAMRecord> getQuerynameSortedAlignedRecords() {

        final CloseableIterator<SAMRecord> mergingIterator;
        final SAMFileHeader header;

        // When the alignment records, including both ends of a pair, are in SAM files
        if (alignedSamFile != null && alignedSamFile.size() > 0) {
            final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(alignedSamFile.size());
            final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(alignedSamFile.size());
            for (final File f : this.alignedSamFile) {
                final SAMFileReader r = new SAMFileReader(f);
                headers.add(r.getFileHeader());
                readers.add(r);
            }

            final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.queryname, headers, false);

            mergingIterator = new MergingSamRecordIterator(headerMerger, readers, true);
            header = headerMerger.getMergedHeader();

        }
        // When the ends are aligned separately and don't have firstOfPair information correctly
        // set we use this branch.
        else {
            mergingIterator = new SeparateEndAlignmentIterator(this.read1AlignedSamFile, this.read2AlignedSamFile);
            header = ((SeparateEndAlignmentIterator)mergingIterator).getHeader();
        }


        if (!forceSort) {
            return mergingIterator;
        }


        final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class,
                    new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);

        int count = 0;
        while (mergingIterator.hasNext()) {
            alignmentSorter.add(mergingIterator.next());
            count++;
            if (count > 0 && count % 1000000 == 0) {
                log.info("Read " + count + " records from alignment SAM/BAM.");
            }
        }
        log.info("Finished reading " + count + " total records from alignment SAM/BAM.");

        mergingIterator.close();
        return new DelegatingIterator<SAMRecord>(alignmentSorter.iterator()) {
            @Override
            public void close() {
                super.close();
                alignmentSorter.cleanup();
            }
        };
    }

    private class SuffixTrimingSamRecordIterator implements CloseableIterator<SAMRecord> {
        private final CloseableIterator<SAMRecord> underlyingIterator;
        private final String suffixToTrim;

        private SuffixTrimingSamRecordIterator(final CloseableIterator<SAMRecord> underlyingIterator, final String suffixToTrim) {
            this.underlyingIterator = underlyingIterator;
            this.suffixToTrim = suffixToTrim;
        }

        @Override
        public void close() {
            underlyingIterator.close();
        }

        @Override
        public boolean hasNext() {
            return underlyingIterator.hasNext();
        }

        @Override
        public SAMRecord next() {
            final SAMRecord rec = underlyingIterator.next();
            final String readName = rec.getReadName();
            if (readName.endsWith(suffixToTrim)) {
                rec.setReadName(readName.substring(0, readName.length() - suffixToTrim.length()));
            }
            return rec;
        }

        @Override
        public void remove() {
            underlyingIterator.remove();
        }
    }

    private class SeparateEndAlignmentIterator implements CloseableIterator<SAMRecord> {

        private final PeekableIterator<SAMRecord> read1Iterator;
        private final PeekableIterator<SAMRecord> read2Iterator;
        private final SAMFileHeader header;

        public SeparateEndAlignmentIterator(final List<File> read1Alignments, final List<File> read2Alignments) {
            final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
            final List<SAMFileReader> read1 = new ArrayList<SAMFileReader>(read1Alignments.size());
            final List<SAMFileReader> read2 = new ArrayList<SAMFileReader>(read2Alignments.size());
            for (final File f : read1Alignments) {
                final SAMFileReader r = new SAMFileReader(f);
                headers.add(r.getFileHeader());
                read1.add(r);
            }
            for (final File f : read2Alignments) {
                final SAMFileReader r = new SAMFileReader(f);
                headers.add(r.getFileHeader());
                read2.add(r);
            }

            final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false);
            read1Iterator = new PeekableIterator<SAMRecord>(
                    new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read1, true), "/1"));
            read2Iterator = new PeekableIterator<SAMRecord>(
                    new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read2, true), "/2"));

            header = headerMerger.getMergedHeader();
        }

        public void close() {
            read1Iterator.close();
            read2Iterator.close();
        }

        public boolean hasNext() {
            return read1Iterator.hasNext() || read2Iterator.hasNext();
        }

        public SAMRecord next() {
            if (read1Iterator.hasNext()) {
                if (read2Iterator.hasNext()) {
                    return (read1Iterator.peek().getReadName().compareTo(read2Iterator.peek().getReadName()) <= 0)
                        ? setPairFlags(read1Iterator.next(), true)
                        : setPairFlags(read2Iterator.next(), false);
                }
                else {
                    return setPairFlags(read1Iterator.next(), true);
                }
            }
            else {
                return setPairFlags(read2Iterator.next(), false);
            }
        }

        public void remove() {
            throw new UnsupportedOperationException("remove() not supported");
        }

        public SAMFileHeader getHeader() { return this.header; }

        private SAMRecord setPairFlags(final SAMRecord sam, final boolean firstOfPair) {
            sam.setReadPairedFlag(true);
            sam.setFirstOfPairFlag(firstOfPair);
            sam.setSecondOfPairFlag(!firstOfPair);
            return sam;
        }
    }

    /**
     * For now, we only ignore those alignments that have more than <code>maxGaps</code> insertions
     * or deletions.
     */
    protected boolean ignoreAlignment(final SAMRecord sam) {
        if (maxGaps == -1) return false;
        int gaps = 0;
        for (final CigarElement el : sam.getCigar().getCigarElements()) {
            if (el.getOperator() == CigarOperator.I || el.getOperator() == CigarOperator.D ) {
                gaps++;
            }
        }
        return gaps > maxGaps;
    }

    // Accessor for testing
    public boolean getForceSort() {return this.forceSort; }
}
TOP

Related Classes of picard.sam.SamAlignmentMerger$SeparateEndAlignmentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.