Package picard.sam

Source Code of picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar

package picard.sam;

import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SortingCollection;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;

import java.io.File;
import java.util.Iterator;

/**
* This tool reverts the original base qualities (if specified) and adds the mate cigar tag to mapped BAMs.
* If the file does not have OQs and already has mate cigar tags, nothing is done.
* New BAM/BAI/MD5 files are created.
* @author Nils Homer
*/
@CommandLineProgramProperties(
        usage = "Reverts the original base qualities and adds the mate cigar tag to read-group BAMs.",
        usageShort = "Reverts the original base qualities and adds the mate cigar tag to read-group BAMs",
        programGroup = SamOrBam.class
)
public class RevertOriginalBaseQualitiesAndAddMateCigar extends CommandLineProgram {

    @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input SAM/BAM file to revert the state of.")
    public File INPUT;

    @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output SAM/BAM file to create.")
    public File OUTPUT;

     @Option(shortName="SO", doc="The sort order to create the reverted output file with."
            + "By default, the sort order will be the same as the input.", optional = true)
    public SAMFileHeader.SortOrder SORT_ORDER = null;

    @Option(shortName=StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc="True to restore original" +
            " qualities from the OQ field to the QUAL field if available.")
    public boolean RESTORE_ORIGINAL_QUALITIES = true;

    @Option(doc="The maximum number of records to examine to determine if we can exit early and not output, given that"
            + " there are a no original base qualities (if we are to restore) and mate cigars exist."
            + " Set to 0 to never skip the file.")
    public int MAX_RECORDS_TO_EXAMINE = 10000;

    private final static Log log = Log.getInstance(RevertOriginalBaseQualitiesAndAddMateCigar.class);

    public RevertOriginalBaseQualitiesAndAddMateCigar() {
        this.CREATE_INDEX = true;
        this.CREATE_MD5_FILE = true;
    }

    /** Default main method impl. */
    public static void main(final String[] args) {
        new RevertOriginalBaseQualitiesAndAddMateCigar().instanceMainWithExit(args);
    }

    public int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

        boolean foundPairedMappedReads = false;

        // Check if we can skip this file since it does not have OQ tags and the mate cigar tag is already there.
        final CanSkipSamFile skipSamFile = RevertOriginalBaseQualitiesAndAddMateCigar.canSkipSAMFile(INPUT, MAX_RECORDS_TO_EXAMINE, RESTORE_ORIGINAL_QUALITIES);
        log.info(skipSamFile.getMessage(MAX_RECORDS_TO_EXAMINE));
        if (skipSamFile.canSkip()) return 0;

        final SAMFileReader in = new SAMFileReader(INPUT, true);
        final SAMFileHeader inHeader = in.getFileHeader();

        // Build the output writer based on the correct sort order
        final SAMFileHeader outHeader = inHeader.clone();
        if (null == SORT_ORDER) this.SORT_ORDER = inHeader.getSortOrder(); // same as the input
        outHeader.setSortOrder(SORT_ORDER);
        SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(CREATE_INDEX);
        SAMFileWriterFactory.setDefaultCreateMd5File(CREATE_MD5_FILE);
        final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, false, OUTPUT);

        // Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname
        final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader),
                new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
        final ProgressLogger revertingProgress = new ProgressLogger(log, 1000000, " reverted OQs");
        int numOriginalQualitiesRestored = 0;
        for (final SAMRecord record : in) {
            // Clean up reads that map off the end of the reference
            AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record);

            if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) {
                // revert the original base qualities
                record.setBaseQualities(record.getOriginalBaseQualities());
                record.setOriginalBaseQualities(null);
                numOriginalQualitiesRestored++;
            }
            if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag()) foundPairedMappedReads = true;
            revertingProgress.record(record);
            sorter.add(record);
        }
        CloserUtil.close(in);
        log.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records");

        /**
         * Iterator through sorting collection output
         * 1. Set mate cigar string and mate information
         * 2. push record into SAMFileWriter to the output
         */
        final SamPairUtil.SetMateInfoIterator sorterIterator = new SamPairUtil.SetMateInfoIterator(sorter.iterator(), true);
        final ProgressLogger sorterProgress = new ProgressLogger(log, 1000000, " mate cigars added");
        while (sorterIterator.hasNext()) {
            final SAMRecord record = sorterIterator.next();
            out.addAlignment(record);
            sorterProgress.record(record);
        }
        sorterIterator.close();
        CloserUtil.close(out);
        log.info("Updated " + sorterIterator.getNumMateCigarsAdded() + " records with mate cigar");
        if (!foundPairedMappedReads) log.info("Did not find any paired mapped reads.");

        return 0;
    }

    /**
     * Used as a return for the canSkipSAMFile function.
     */
    public enum CanSkipSamFile {
        CAN_SKIP("Can skip the BAM file", true),
        CANNOT_SKIP_FOUND_OQ("Cannot skip the BAM as we found a record with an OQ", false),
        CANNOT_SKIP_FOUND_NO_MC("Cannot skip the BAM as we found a mate with no mate cigar tag", false),
        FOUND_NO_EVIDENCE("Found no evidence of OQ or mate with no mate cigar in the first %d records.  Will continue...", false);
        final private String format;
        final private boolean skip;

        private CanSkipSamFile(final String format, final boolean skip) {
            this.format = format;
            this.skip = skip;
        }

        public String getMessage(final int maxRecordsToExamine) { return String.format(this.format, maxRecordsToExamine); }
        public boolean canSkip() { return this.skip; }
    }

    /**
     * Checks if we can skip the SAM/BAM file when reverting origin base qualities and adding mate cigars.
     * @param inputFile the SAM/BAM input file
     * @param maxRecordsToExamine the maximum number of records to examine before quitting
     * @param revertOriginalBaseQualities true if we are to revert original base qualities, false otherwise
     * @return whether we can skip or not, and the explanation why.
     */
    public static CanSkipSamFile canSkipSAMFile(final File inputFile, final int maxRecordsToExamine, final boolean revertOriginalBaseQualities)  {
        final SAMFileReader in = new SAMFileReader(inputFile, true);
        final Iterator<SAMRecord> iterator = in.iterator();
        int numRecordsExamined = 0;
        CanSkipSamFile returnType = CanSkipSamFile.FOUND_NO_EVIDENCE;

        while (iterator.hasNext() && numRecordsExamined < maxRecordsToExamine) {
            final SAMRecord record = iterator.next();

            if (revertOriginalBaseQualities && null != record.getOriginalBaseQualities()) {
                // has OQ, break and return case #2
                returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_OQ;
                break;
            }

            // check if mate pair and its mate is mapped
            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                if (null == SAMUtils.getMateCigar(record)) {
                    // has no MC, break and return case #2
                    returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_NO_MC;
                    break;
                }
                else {
                    // has MC, previously checked that it does not have OQ, break and return case #1
                    returnType = CanSkipSamFile.CAN_SKIP;
                    break;
                }
            }

            numRecordsExamined++;
        }

        // no more records anyhow, so we can skip
        if (!iterator.hasNext() && CanSkipSamFile.FOUND_NO_EVIDENCE == returnType) {
            returnType = CanSkipSamFile.CAN_SKIP;
        }

        in.close();

        return returnType;
    }
}
TOP

Related Classes of picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.