Package picard.sam

Source Code of picard.sam.AbstractAlignmentMerger

/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;

import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordCoordinateComparator;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SAMRecordUtil;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.FilteringIterator;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CigarUtil;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;

import java.io.File;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;

/**
* Abstract class that coordinates the general task of taking in a set of alignment information,
* possibly in SAM format, possibly in other formats, and merging that with the set of all reads
* for which alignment was attempted, stored in an unmapped SAM file.
*
* The order of processing is as follows:
*
*   1.  Get records from the unmapped bam and the alignment data
*   2.  Merge the alignment information and public tags ONLY from the aligned SAMRecords
*   3.  Do additional modifications -- handle clipping, trimming, etc.
*   4.  Fix up mate information on paired reads
*   5.  Do a final calculation of the NM and UQ tags.
*   6.  Write the records to the output file.
*
* Concrete subclasses which extend AbstractAlignmentMerger should implement getQueryNameSortedAlignedRecords.
* If these records are not in queryname order, mergeAlignment will throw an IllegalStateException.
*
* Subclasses may optionally implement ignoreAlignment(), which can be used to skip over certain alignments.
*
*
* @author ktibbett@broadinstitute.org
*/
public abstract class AbstractAlignmentMerger {

    public static final int MAX_RECORDS_IN_RAM = 500000;

    private static final char[] RESERVED_ATTRIBUTE_STARTS = {'X','Y', 'Z'};

    private final Log log = Log.getInstance(AbstractAlignmentMerger.class);
    private final ProgressLogger progress = new ProgressLogger(this.log, 1000000, "Written to sorting collection in queryname order", "records");

    private final File unmappedBamFile;
    private final File targetBamFile;
    private final SAMSequenceDictionary sequenceDictionary;
    private ReferenceSequenceFileWalker refSeq = null;
    private final boolean clipAdapters;
    private final boolean bisulfiteSequence;
    private SAMProgramRecord programRecord;
    private final boolean alignedReadsOnly;
    private final SAMFileHeader header;
    private final List<String> attributesToRetain = new ArrayList<String>();
    private final List<String> attributesToRemove = new ArrayList<String>();
    private final File referenceFasta;
    private final Integer read1BasesTrimmed;
    private final Integer read2BasesTrimmed;
    private final List<SamPairUtil.PairOrientation> expectedOrientations;
    private final SortOrder sortOrder;
    private MultiHitAlignedReadIterator alignedIterator = null;
    private boolean clipOverlappingReads = true;
    private int maxRecordsInRam = MAX_RECORDS_IN_RAM;
    private final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy;
    private boolean keepAlignerProperPairFlags = false;
    private boolean addMateCigar = false;

    private final SamRecordFilter alignmentFilter = new SamRecordFilter() {
        public boolean filterOut(final SAMRecord record) {
            return ignoreAlignment(record);
        }
        public boolean filterOut(final SAMRecord first, final SAMRecord second) {
            throw new UnsupportedOperationException("Paired SamRecordFilter not implemented!");
        }
    };
    private boolean includeSecondaryAlignments = true;

    protected abstract CloseableIterator<SAMRecord> getQuerynameSortedAlignedRecords();
   
    protected boolean ignoreAlignment(final SAMRecord sam) { return false; } // default implementation

    /**
     * Constructor
     *
     * @param unmappedBamFile   The BAM file that was used as the input to the aligner, which will
     *                          include info on all the reads that did not map.  Required.
     * @param targetBamFile     The file to which to write the merged SAM records. Required.
     * @param referenceFasta    The reference sequence for the map files. Required.
     * @param clipAdapters      Whether adapters marked in unmapped BAM file should be marked as
     *                          soft clipped in the merged bam. Required.
     * @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the
     *                          NM and UQ tags). Required.
     * @param alignedReadsOnly  Whether to output only those reads that have alignment data
     * @param programRecord     Program record for target file SAMRecords created.
     * @param attributesToRetain  private attributes from the alignment record that should be
     *                          included when merging.  This overrides the exclusion of
     *                          attributes whose tags start with the reserved characters
     *                          of X, Y, and Z
     * @param attributesToRemove  attributes from the alignment record that should be
     *                          removed when merging.  This overrides attributesToRetain if they share
     *                           common tags.
     * @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment.  Optional.
     * @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment.  Optional.
     * @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for
     *                          aligned pairs.  Used to determine the properPair flag.
     * @param sortOrder           The order in which the merged records should be output.  If null,
     *                            output will be coordinate-sorted
     * @param primaryAlignmentSelectionStrategy What to do when there are multiple primary alignments, or multiple
     *                                          alignments but none primary, for a read or read pair.
     * @param addMateCigar      True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include.
     */
    public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamFile,
                                   final File referenceFasta, final boolean clipAdapters,
                                   final boolean bisulfiteSequence, final boolean alignedReadsOnly,
                                   final SAMProgramRecord programRecord, final List<String> attributesToRetain,
                                   final List<String> attributesToRemove,
                                   final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
                                   final List<SamPairUtil.PairOrientation> expectedOrientations,
                                   final SAMFileHeader.SortOrder sortOrder,
                                   final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy,
                                   final boolean addMateCigar) {
        IOUtil.assertFileIsReadable(unmappedBamFile);
        IOUtil.assertFileIsWritable(targetBamFile);
        IOUtil.assertFileIsReadable(referenceFasta);

        this.unmappedBamFile = unmappedBamFile;
        this.targetBamFile = targetBamFile;
        this.referenceFasta = referenceFasta;

        this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
        this.sequenceDictionary = refSeq.getSequenceDictionary();
        if (this.sequenceDictionary == null) {
            throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() +
                    ".  Use CreateSequenceDictionary.jar to create a sequence dictionary.");
        }

        this.clipAdapters = clipAdapters;
        this.bisulfiteSequence = bisulfiteSequence;
        this.alignedReadsOnly = alignedReadsOnly;

        this.header = new SAMFileHeader();
        this.sortOrder = sortOrder != null ? sortOrder : SortOrder.coordinate;
        header.setSortOrder(SortOrder.coordinate);
        if (programRecord != null) {
            setProgramRecord(programRecord);
        }
        header.setSequenceDictionary(this.sequenceDictionary);
        if (attributesToRetain != null) {
            this.attributesToRetain.addAll(attributesToRetain);
        }
        if (attributesToRemove != null) {
            this.attributesToRemove.addAll(attributesToRemove);
            // attributesToRemove overrides attributesToRetain
            if (!this.attributesToRetain.isEmpty()) {
                for (String attribute : this.attributesToRemove) {
                    if (this.attributesToRetain.contains(attribute)) {
                        log.info("Overriding retaining the " + attribute + " tag since remove overrides retain.");
                        this.attributesToRetain.remove(attribute);
                    }
                }
            }
        }
        this.read1BasesTrimmed = read1BasesTrimmed;
        this.read2BasesTrimmed = read2BasesTrimmed;
        this.expectedOrientations = expectedOrientations;

        this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;

        this.addMateCigar = addMateCigar;
    }

    /** Allows the caller to override the maximum records in RAM. */
    public void setMaxRecordsInRam(final int maxRecordsInRam) {
        this.maxRecordsInRam = maxRecordsInRam;
    }

    /**
     * Do this unconditionally, not just for aligned records, for two reasons:
     * - An unaligned read has been processed by the aligner, so it is more truthful.
     * - When chaining additional PG records, having all the records in the output file refer to the same PG
     *   record means that only one chain will need to be created, rather than a chain for the mapped reads
     *   and a separate chain for the unmapped reads.
     */
    private void maybeSetPgTag(final SAMRecord rec) {
        if (this.programRecord != null) {
            rec.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, this.programRecord.getProgramGroupId());
        }
    }
    /**

    /**
     * Merges the alignment data with the non-aligned records from the source BAM file.
     */
    public void mergeAlignment() {
        // Open the file of unmapped records and write the read groups to the the header for the merged file
        final SamReader unmappedSam = SamReaderFactory.makeDefault().open(this.unmappedBamFile);

        // Check that the program record we are going to insert is not already used in the unmapped SAM
        if (getProgramRecord() != null) {
            for (final SAMProgramRecord pg : unmappedSam.getFileHeader().getProgramRecords()) {
                if (pg.getId().equals(getProgramRecord().getId())) {
                    throw new PicardException("Program Record ID already in use in unmapped BAM file.");
                }
            }
        }

        final CloseableIterator<SAMRecord> unmappedIterator = unmappedSam.iterator();
        this.header.setReadGroups(unmappedSam.getFileHeader().getReadGroups());

        int aligned = 0;
        int unmapped = 0;

        // Get the aligned records and set up the first one
        alignedIterator = new MultiHitAlignedReadIterator(new FilteringIterator(getQuerynameSortedAlignedRecords(), alignmentFilter), primaryAlignmentSelectionStrategy);
        HitsForInsert nextAligned = nextAligned();

        // Create the sorting collection that will write the records in the coordinate order
        // to the final bam file
        final SortingCollection<SAMRecord> sorted = SortingCollection.newInstance(
            SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(),
            MAX_RECORDS_IN_RAM);

        while (unmappedIterator.hasNext()) {
            // Load next unaligned read or read pair.
            final SAMRecord rec = unmappedIterator.next();

            rec.setHeader(this.header);
            maybeSetPgTag(rec);

            final SAMRecord secondOfPair;
            if (rec.getReadPairedFlag()) {
                secondOfPair = unmappedIterator.next();
                secondOfPair.setHeader(this.header);
                maybeSetPgTag(secondOfPair);

                // Validate that paired reads arrive as first of pair followed by second of pair
                if (!rec.getReadName().equals(secondOfPair.getReadName()))
                    throw new PicardException("Second read from pair not found in unmapped bam: " + rec.getReadName() + ", " + secondOfPair.getReadName());

                if (!rec.getFirstOfPairFlag()) throw new PicardException("First record in unmapped bam is not first of pair: " + rec.getReadName());
                if (!secondOfPair.getReadPairedFlag())  throw new PicardException("Second record in unmapped bam is not marked as paired: " + secondOfPair.getReadName());
                if (!secondOfPair.getSecondOfPairFlag())  throw new PicardException("Second record in unmapped bam is not second of pair: " + secondOfPair.getReadName());
            }
            else {
                secondOfPair = null;
            }

            // See if there are alignments for current unaligned read or read pair.
            if (nextAligned != null && rec.getReadName().equals(nextAligned.getReadName())) {
                // If there are multiple alignments for a read (pair), then the unaligned SAMRecord must be cloned
                // before copying info from the aligned record to the unaligned.
                final boolean clone = nextAligned.numHits() > 1 || nextAligned.hasSupplementalHits();
                SAMRecord r1Primary = null, r2Primary = null;

                if (rec.getReadPairedFlag()) {
                    for (int i = 0; i < nextAligned.numHits(); ++i) {
                        // firstAligned or secondAligned may be null, if there wasn't an alignment for the end,
                        // or if the alignment was rejected by ignoreAlignment.
                        final SAMRecord firstAligned = nextAligned.getFirstOfPair(i);
                        final SAMRecord secondAligned = nextAligned.getSecondOfPair(i);

                        final boolean isPrimaryAlignment = (firstAligned != null && !firstAligned.isSecondaryOrSupplementary()) ||
                                (secondAligned != null && !secondAligned.isSecondaryOrSupplementary());

                        final SAMRecord firstToWrite;
                        final SAMRecord secondToWrite;
                        if (clone) {
                            firstToWrite = clone(rec);
                            secondToWrite = clone(secondOfPair);
                        } else {
                            firstToWrite = rec;
                            secondToWrite = secondOfPair;
                        }

                        // If these are the primary alignments then stash them for use on any supplemental alignments
                        if (isPrimaryAlignment) {
                            r1Primary = firstToWrite;
                            r2Primary = secondToWrite;
                        }

                        transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite, firstAligned, secondAligned);

                        // Only write unmapped read when it has the mate info from the primary alignment.
                        if (!firstToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
                            addIfNotFiltered(sorted, firstToWrite);
                            if (firstToWrite.getReadUnmappedFlag()) ++unmapped;
                            else ++aligned;
                        }
                        if (!secondToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
                            addIfNotFiltered(sorted, secondToWrite);
                            if (!secondToWrite.getReadUnmappedFlag()) ++aligned;
                            else ++unmapped;
                        }
                    }

                    // Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
                    for (final boolean isRead1 : new boolean[]{true,false}) {
                        final List<SAMRecord> supplementals = isRead1 ? nextAligned.getSupplementalFirstOfPairOrFragment() : nextAligned.getSupplementalSecondOfPair();
                        final SAMRecord sourceRec           = isRead1 ? rec                                                : secondOfPair;
                        final SAMRecord matePrimary         = isRead1 ? r2Primary                                          : r1Primary;

                        for (final SAMRecord supp : supplementals) {
                            final SAMRecord out = clone(sourceRec);
                            transferAlignmentInfoToFragment(out, supp);
                            if (matePrimary != null) SamPairUtil.setMateInformationOnSupplementalAlignment(out, matePrimary, addMateCigar);
                            ++aligned;
                            addIfNotFiltered(sorted, out);
                        }
                    }
                }
                else {
                    for (int i = 0; i < nextAligned.numHits(); ++i) {
                        final SAMRecord recToWrite = clone ? clone(rec) : rec;
                        transferAlignmentInfoToFragment(recToWrite, nextAligned.getFragment(i));
                        addIfNotFiltered(sorted, recToWrite);
                        if (recToWrite.getReadUnmappedFlag()) ++unmapped;
                        else ++aligned;
                    }
                    // Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
                    for (final SAMRecord supplementalRec : nextAligned.getSupplementalFirstOfPairOrFragment()) {
                        final SAMRecord recToWrite = clone(rec);
                        transferAlignmentInfoToFragment(recToWrite, supplementalRec);
                        addIfNotFiltered(sorted, recToWrite);
                        ++aligned;
                    }
                }
                nextAligned = nextAligned();
            } else {
                // There was no alignment for this read or read pair.
                if (nextAligned != null &&
                        SAMRecordQueryNameComparator.compareReadNames(rec.getReadName(), nextAligned.getReadName()) > 0) {
                    throw new IllegalStateException("Aligned record iterator (" + nextAligned.getReadName() +
                            ") is behind the unmapped reads (" + rec.getReadName() + ")");
                }
                // No matching read from alignedIterator -- just output reads as is.
                if (!alignedReadsOnly) {
                    sorted.add(rec);
                    ++unmapped;
                    if (secondOfPair != null) {
                        sorted.add(secondOfPair);
                        ++unmapped;
                    }
                }
            }
        }
        unmappedIterator.close();
        if (alignedIterator.hasNext()) {
            throw new IllegalStateException("Reads remaining on alignment iterator: " + alignedIterator.next().getReadName() + "!");
        }
        alignedIterator.close();

        // Write the records to the output file in specified sorted order,
        header.setSortOrder(this.sortOrder);
        final boolean presorted = this.sortOrder == SortOrder.coordinate;
        final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
      writer.setProgressLogger(
          new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
        final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");

        for (final SAMRecord rec : sorted) {
            if (!rec.getReadUnmappedFlag()) {
                if (refSeq != null) {
                    final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
                    rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));

                    if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
                        rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
                    }
                }
            }
            writer.addAlignment(rec);
            finalProgress.record(rec);
        }
        writer.close();
        sorted.cleanup();

        log.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
    }

    /**
     * Add record if it is primary or optionally secondary.
     */
    private void addIfNotFiltered(final SortingCollection<SAMRecord> sorted, final SAMRecord rec) {
        if (includeSecondaryAlignments || !rec.getNotPrimaryAlignmentFlag()) {
            sorted.add(rec);
            this.progress.record(rec);
        }
    }

    private SAMRecord clone(final SAMRecord rec) {
        try {
            return (SAMRecord)rec.clone();
        } catch (CloneNotSupportedException e) {
            throw new PicardException("Should never happen.");
        }
    }
    /**
     * @return Next read's alignment(s) from aligned input or null, if there are no more.
     * The alignments are run through ignoreAlignment() filter before being returned, which may result
     * in an entire read being skipped if all alignments for that read should be ignored.
     */
    private HitsForInsert nextAligned() {
        if (alignedIterator.hasNext()) return alignedIterator.next();
        return null;
    }

    /**
     * Copies alignment info from aligned to unaligned read, clips as appropriate, and sets PG ID.
     * @param unaligned Original SAMRecord, and object into which values are copied.
     * @param aligned Holds alignment info that will be copied into unaligned.
     */
    private void transferAlignmentInfoToFragment(final SAMRecord unaligned, final SAMRecord aligned) {
        setValuesFromAlignment(unaligned, aligned);
        updateCigarForTrimmedOrClippedBases(unaligned, aligned);
        if (SAMUtils.cigarMapsNoBasesToRef(unaligned.getCigar())) {
            SAMUtils.makeReadUnmapped(unaligned);
        } else if (SAMUtils.recordMapsEntirelyBeyondEndOfReference(aligned)) {
            log.warn("Record mapped off end of reference; making unmapped: " + aligned);
            SAMUtils.makeReadUnmapped(unaligned);
        }
    }

    /**
     * Copies alignment info from aligned to unaligned read, if there is an alignment, and sets mate information.
     * @param firstUnaligned Original first of pair, into which alignment and pair info will be written.
     * @param secondUnaligned Original second of pair, into which alignment and pair info will be written.
     * @param firstAligned Aligned first of pair, or null if no alignment.
     * @param secondAligned Aligned second of pair, or null if no alignment.
     */
    private void transferAlignmentInfoToPairedRead(final SAMRecord firstUnaligned, final SAMRecord secondUnaligned, final SAMRecord firstAligned, final SAMRecord secondAligned) {
        if (firstAligned != null) transferAlignmentInfoToFragment(firstUnaligned, firstAligned);
        if (secondAligned != null) transferAlignmentInfoToFragment(secondUnaligned, secondAligned);
        if (isClipOverlappingReads()) clipForOverlappingReads(firstUnaligned, secondUnaligned);
        SamPairUtil.setMateInfo(secondUnaligned, firstUnaligned, header, addMateCigar);
        if (!keepAlignerProperPairFlags) {
            SamPairUtil.setProperPairFlags(secondUnaligned, firstUnaligned, expectedOrientations);
        }
    }



    /**
     * Checks to see whether the ends of the reads overlap and soft clips reads
     * them if necessary.
     */
    protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) {
        // If both reads are mapped, see if we need to clip the ends due to small
        // insert size
        if (!(read1.getReadUnmappedFlag() || read2.getReadUnmappedFlag())) {

            if (read1.getReadNegativeStrandFlag() != read2.getReadNegativeStrandFlag())
            {
                final SAMRecord pos = (read1.getReadNegativeStrandFlag()) ? read2 : read1;
                final SAMRecord neg = (read1.getReadNegativeStrandFlag()) ? read1 : read2;

                // Innies only -- do we need to do anything else about jumping libraries?
                if (pos.getAlignmentStart() < neg.getAlignmentEnd()) {
                    final int posDiff = pos.getAlignmentEnd() - neg.getAlignmentEnd();
                    final int negDiff = pos.getAlignmentStart() - neg.getAlignmentStart();

                    if (posDiff > 0) {
                        CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(),
                                pos.getReadLength() - posDiff + 1));
                    }

                    if (negDiff > 0) {
                        CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(),
                                neg.getReadLength() - negDiff + 1));
                    }

                }
            }
            else {
                // TODO: What about RR/FF pairs?
            }
         }

    }

    /**
     * Sets the values from the alignment record on the unaligned BAM record.  This
     * preserves all data from the unaligned record (ReadGroup, NoiseRead status, etc)
     * and adds all the alignment info
     *
     * @param rec           The unaligned read record
     * @param alignment     The alignment record
     */
    protected void setValuesFromAlignment(final SAMRecord rec, final SAMRecord alignment) {
        for (final SAMRecord.SAMTagAndValue attr : alignment.getAttributes()) {
            // Copy over any non-reserved attributes.  attributesToRemove overrides attributesToRetain.
            if ((!isReservedTag(attr.tag) || this.attributesToRetain.contains(attr.tag)) && !this.attributesToRemove.contains(attr.tag)) {
               rec.setAttribute(attr.tag, attr.value);
            }
        }
        rec.setReadUnmappedFlag(alignment.getReadUnmappedFlag());

        // Note that it is important to get reference names rather than indices in case the sequence dictionaries
        // in the two files are in different orders.
        rec.setReferenceName(alignment.getReferenceName());

        rec.setAlignmentStart(alignment.getAlignmentStart());
        rec.setReadNegativeStrandFlag(alignment.getReadNegativeStrandFlag());
        rec.setNotPrimaryAlignmentFlag(alignment.getNotPrimaryAlignmentFlag());
        rec.setSupplementaryAlignmentFlag(alignment.getSupplementaryAlignmentFlag());
        if (!alignment.getReadUnmappedFlag()) {
            // only aligned reads should have cigar and mapping quality set
            rec.setCigar(alignment.getCigar())// cigar may change when a
                                                 // clipCigar called below
            rec.setMappingQuality(alignment.getMappingQuality());
        }
        if (rec.getReadPairedFlag()) {
            rec.setProperPairFlag(alignment.getProperPairFlag());
            // Mate info and alignment size will get set by the ClippedPairFixer.
        }

        // If it's on the negative strand, reverse complement the bases
        // and reverse the order of the qualities
        if (rec.getReadNegativeStrandFlag()) {
            SAMRecordUtil.reverseComplement(rec);
        }

    }

    private static Cigar createNewCigarIfMapsOffEndOfReference(SAMFileHeader header,
                                                              boolean isUnmapped,
                                                              int referenceIndex,
                                                              int alignmentEnd,
                                                              int readLength,
                                                              Cigar oldCigar) {
        Cigar newCigar = null;
        if (!isUnmapped) {
            final SAMSequenceRecord refseq = header.getSequence(referenceIndex);
            final int overhang = alignmentEnd - refseq.getSequenceLength();
            if (overhang > 0) {
                // 1-based index of first base in read to clip.
                int clipFrom = readLength - overhang + 1;
                // we have to check if the last element is soft-clipping, so we can subtract that from clipFrom
                final CigarElement cigarElement = oldCigar.getCigarElement(oldCigar.getCigarElements().size()-1);
                if (CigarOperator.SOFT_CLIP == cigarElement.getOperator()) clipFrom -= cigarElement.getLength();
                final List<CigarElement> newCigarElements  = CigarUtil.softClipEndOfRead(clipFrom, oldCigar.getCigarElements());
                newCigar = new Cigar(newCigarElements);
            }
        }
        return newCigar;
    }

    /**
     * Soft-clip an alignment that hangs off the end of its reference sequence.  Checks both the read and its mate,
     * if available.
     * @param rec
     */
    public static void createNewCigarsIfMapsOffEndOfReference(final SAMRecord rec) {
        // If the read maps off the end of the alignment, clip it
        if (!rec.getReadUnmappedFlag()) {
            final Cigar readCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
                    rec.getReadUnmappedFlag(),
                    rec.getReferenceIndex(),
                    rec.getAlignmentEnd(),
                    rec.getReadLength(),
                    rec.getCigar());
            if (null != readCigar) rec.setCigar(readCigar);
        }

        // If the read's mate maps off the end of the alignment, clip it
        if (SAMUtils.hasMateCigar(rec)) {
            Cigar mateCigar = SAMUtils.getMateCigar(rec);
            mateCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
                    rec.getMateUnmappedFlag(),
                    rec.getMateReferenceIndex(),
                    SAMUtils.getMateAlignmentEnd(rec), // NB: this could be computed without another call to getMateCigar
                    mateCigar.getReadLength(),
                    mateCigar);
            if (null != mateCigar) rec.setAttribute(SAMTag.MC.name(), mateCigar.toString());
        }
    }

    protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) {
        // If the read was trimmed or not all the bases were sent for alignment, clip it
        final int alignmentReadLength = alignment.getReadLength();
        final int originalReadLength = rec.getReadLength();
        final int trimmed = (!rec.getReadPairedFlag()) || rec.getFirstOfPairFlag()
                ? this.read1BasesTrimmed != null ? this.read1BasesTrimmed : 0
                : this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0;
        final int notWritten = originalReadLength - (alignmentReadLength + trimmed);
      
        // Update cigar if the mate maps off the reference
        createNewCigarsIfMapsOffEndOfReference(rec);

        rec.setCigar(CigarUtil.addSoftClippedBasesToEndsOfCigar(
            rec.getCigar(), rec.getReadNegativeStrandFlag(), notWritten, trimmed));

        // If the adapter sequence is marked and clipAdapter is true, clip it
        if (this.clipAdapters && rec.getAttribute(ReservedTagConstants.XT) != null){
            CigarUtil.softClip3PrimeEndOfRead(rec, rec.getIntegerAttribute(ReservedTagConstants.XT));
        }
    }


    protected SAMSequenceDictionary getSequenceDictionary() { return this.sequenceDictionary; }

    protected SAMProgramRecord getProgramRecord() { return this.programRecord; }

    protected void setProgramRecord(final SAMProgramRecord pg ) {
        if (this.programRecord != null) {
            throw new IllegalStateException("Cannot set program record more than once on alignment merger.");
        }
        this.programRecord = pg;
        this.header.addProgramRecord(pg);
        SAMUtils.chainSAMProgramRecord(header, pg);
    }

    protected boolean isReservedTag(final String tag) {
        final char firstCharOfTag = tag.charAt(0);

        // All tags that start with a lower-case letter are user defined and should not be overridden by aligner
        // unless explicitly specified in attributesToRetain.
        if (Character.isLowerCase(firstCharOfTag)) return true;

        for (final char c : RESERVED_ATTRIBUTE_STARTS) {
            if (firstCharOfTag == c) return true;
        }
        return false;
    }

    protected SAMFileHeader getHeader() { return this.header; }

    protected void resetRefSeqFileWalker() {
        this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
    }

    public boolean isClipOverlappingReads() {
        return clipOverlappingReads;
    }

    public void setClipOverlappingReads(final boolean clipOverlappingReads) {
        this.clipOverlappingReads = clipOverlappingReads;
    }

    public boolean isKeepAlignerProperPairFlags() {
        return keepAlignerProperPairFlags;
    }

    /**
     * If true, keep the aligner's idea of proper pairs rather than letting alignment merger decide.
     */
    public void setKeepAlignerProperPairFlags(final boolean keepAlignerProperPairFlags) {
        this.keepAlignerProperPairFlags = keepAlignerProperPairFlags;
    }

    public void setIncludeSecondaryAlignments(final boolean includeSecondaryAlignments) {
        this.includeSecondaryAlignments = includeSecondaryAlignments;
    }

    public void close() {
        CloserUtil.close(this.refSeq);
    }
}
TOP

Related Classes of picard.sam.AbstractAlignmentMerger

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.