Package picard.sam

Source Code of picard.sam.FilterSamReads

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

/**
* $Id$
*/
package picard.sam;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.filter.AlignedFilter;
import htsjdk.samtools.filter.FilteringIterator;
import htsjdk.samtools.filter.ReadNameFilter;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;

/**
* From a SAM or BAM file, produce a new SAM or BAM by filtering aligned reads or a list of read
* names provided in a file (one readname per line)
* <p/>
* $Id$
*/
@CommandLineProgramProperties(
        usage = "Produces a new SAM or BAM file by including or excluding aligned reads " +
                "or a list of reads names supplied in the READ_LIST_FILE from the INPUT SAM or BAM file.\n",
        usageShort = "Creates a new SAM or BAM file by including or excluding aligned reads",
        programGroup = SamOrBam.class
)
public class FilterSamReads extends CommandLineProgram {

    private static final Log log = Log.getInstance(FilterSamReads.class);

    private static enum Filter {
        includeAligned("OUTPUT SAM/BAM will contain aligned reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of paired reads must be aligned to be included in the OUTPUT SAM or BAM)"),
        excludeAligned("OUTPUT SAM/BAM will contain un-mapped reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of pair must be aligned to be excluded from the OUTPUT SAM or BAM)"),
        includeReadList("OUTPUT SAM/BAM will contain reads that are supplied in the READ_LIST_FILE file"),
        excludeReadList("OUTPUT bam will contain reads that are *not* supplied in the READ_LIST_FILE file");

        private final String description;

        Filter(final String description) {
            this.description = description;
        }

        @Override
        public String toString() {
            return this.name() + " [" + description + "]";
        }
    }

    @Option(doc = "The SAM or BAM file that will be filtered.",
        optional = false,
        shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
    public File INPUT;

    @Option(doc = "Filter.", optional = false)
    public Filter FILTER = null;

    @Option(doc = "Read List File containing reads that will be included or excluded from the OUTPUT SAM or BAM file.",
            optional = true,
            shortName = "RLF")
    public File READ_LIST_FILE;

    @Option(
        doc = "SortOrder of the OUTPUT SAM or BAM file, otherwise use the SortOrder of the INPUT file.",
        optional = true, shortName = "SO")
    public SAMFileHeader.SortOrder SORT_ORDER;

    @Option(
        doc = "Create .reads files (for debugging purposes)",
        optional = true)
    public boolean WRITE_READS_FILES = true;

    @Option(doc = "SAM or BAM file to write read excluded results to",
        optional = false, shortName = "O")
    public File OUTPUT;

    private void filterReads(final FilteringIterator filteringIterator) {

        // get OUTPUT header from INPUT and owerwrite it if necessary
        final SAMFileReader inputReader = new SAMFileReader(INPUT);
        final SAMFileHeader.SortOrder inputSortOrder = inputReader.getFileHeader().getSortOrder();
        final SAMFileHeader outputHeader = inputReader.getFileHeader();
        if (SORT_ORDER != null) {
            outputHeader.setSortOrder(SORT_ORDER);
        }
        final boolean presorted = inputSortOrder.equals(outputHeader.getSortOrder());
        log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" +
            OUTPUT.getName() + " [sortorder=" + outputHeader.getSortOrder().name() + "]");

        // create OUTPUT file
        final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, presorted, OUTPUT);

        final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written");
       
        while (filteringIterator.hasNext()) {
            final SAMRecord rec = filteringIterator.next();
            outputWriter.addAlignment(rec);
            progress.record(rec);
        }

        filteringIterator.close();
        outputWriter.close();
        inputReader.close();
        log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
    }

    /**
     * Write out a file of read names for debugging purposes.
     *
     * @param samOrBamFile The SAM or BAM file for which we are going to write out a file of its
     *                     containing read names
     */
    private void writeReadsFile(final File samOrBamFile) throws IOException {
        final SAMFileReader reader = new SAMFileReader(samOrBamFile);
        final File readsFile =
            new File(OUTPUT.getParentFile(), IOUtil.basename(samOrBamFile) + ".reads");
        IOUtil.assertFileIsWritable(readsFile);
        final BufferedWriter bw = IOUtil.openFileForBufferedWriting(readsFile, false);

        for (final SAMRecord rec : reader) {
            bw.write(rec.toString() + "\n");
        }

        bw.close();
        reader.close();
        IOUtil.assertFileIsReadable(readsFile);
    }

    @Override
    protected int doWork() {

        try {
            IOUtil.assertFileIsReadable(INPUT);
            IOUtil.assertFileIsWritable(OUTPUT);
            if (WRITE_READS_FILES) writeReadsFile(INPUT);

            switch (FILTER) {
                case includeAligned:
                    filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(),
                    new AlignedFilter(true), true));
                    break;
                case excludeAligned:
                    filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(),
                    new AlignedFilter(false), true));
                    break;
                case includeReadList:
                    filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(),
                    new ReadNameFilter(READ_LIST_FILE, true)));
                    break;
                case excludeReadList:
                    filterReads(new FilteringIterator(new SAMFileReader(INPUT).iterator(),
                    new ReadNameFilter(READ_LIST_FILE, false)));
                    break;
                default:
                    throw new UnsupportedOperationException(FILTER.name() + " has not been implemented!");
            }

            IOUtil.assertFileIsReadable(OUTPUT);
            if (WRITE_READS_FILES) writeReadsFile(OUTPUT);
            return 0;

        } catch (Exception e) {
            if (OUTPUT.exists() && !OUTPUT.delete()) {
                log.warn("Failed to delete " + OUTPUT.getAbsolutePath());
            }

            log.error(e, "Failed to filter " + INPUT.getName());
            return 1;
        }
    }

    @Override
    protected String[] customCommandLineValidation() {
        if (INPUT.equals(OUTPUT)) {
            return new String[]{"INPUT file and OUTPUT file must differ!"};
        }

        if ((FILTER.equals(Filter.includeReadList) ||
                FILTER.equals(Filter.excludeReadList)) &&
                READ_LIST_FILE == null) {
            return new String[]{"A READ_LIST_FILE must be specified when using the " + FILTER.name() + " option"};

        }

        return super.customCommandLineValidation();
    }

    /**
     * Stock main method.
     *
     * @param args main arguments
     */
    public static void main(final String[] args) {
        System.exit(new FilterSamReads().instanceMain(args));
    }

}
TOP

Related Classes of picard.sam.FilterSamReads

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.