Package htsjdk.samtools.util

Examples of htsjdk.samtools.util.ProgressLogger


                new PairedReadComparator(),
                MAX_RECORDS_IN_RAM,
                TMP_DIR);

        // Loop through the input files and pick out the read sequences etc.
        final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read");
        for (final File f : INPUT) {
            final Map<String, PairedReadSequence> pendingByName = new HashMap<String, PairedReadSequence>();
            final SAMFileReader in = new SAMFileReader(f);
            readGroups.addAll(in.getFileHeader().getReadGroups());

            for (final SAMRecord rec : in) {
                if (!rec.getReadPairedFlag()) continue;
                if (!rec.getFirstOfPairFlag() && !rec.getSecondOfPairFlag()) {
                    continue;
                }

                PairedReadSequence prs = pendingByName.remove(rec.getReadName());
                if (prs == null) {
                    // Make a new paired read object and add RG and physical location information to it
                    prs = new PairedReadSequence();
                    if (opticalDuplicateFinder.addLocationInformation(rec.getReadName(), prs)) {
                        final SAMReadGroupRecord rg = rec.getReadGroup();
                        if (rg != null) prs.setReadGroup((short) readGroups.indexOf(rg));
                    }

                    pendingByName.put(rec.getReadName(), prs);
                }

                // Read passes quality check if both ends meet the mean quality criteria
                final boolean passesQualityCheck = passesQualityCheck(rec.getReadBases(),
                        rec.getBaseQualities(),
                        MIN_IDENTICAL_BASES,
                        MIN_MEAN_QUALITY);
                prs.qualityOk = prs.qualityOk && passesQualityCheck;

                // Get the bases and restore them to their original orientation if necessary
                final byte[] bases = rec.getReadBases();
                if (rec.getReadNegativeStrandFlag()) SequenceUtil.reverseComplement(bases);

                if (rec.getFirstOfPairFlag()) {
                    prs.read1 = bases;
                } else {
                    prs.read2 = bases;
                }

                if (prs.read1 != null && prs.read2 != null && prs.qualityOk) {
                    sorter.add(prs);
                }

                progress.record(rec);
            }
        }

        log.info("Finished reading - moving on to scanning for duplicates.");
View Full Code Here


     * readable and that the output file can be written to.  Then iterates through
     * all the records accumulating metrics.  Finally writes metrics file
     */
    protected int doWork() {
        final Log log = Log.getInstance(getClass());
        final ProgressLogger progress = new ProgressLogger(log);

        // Some quick parameter checking
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

        log.info("Reading input file and calculating metrics.");

        final SAMFileReader sam = new SAMFileReader(IOUtil.openFileForReading(INPUT));

        final MetricsFile<QualityYieldMetrics,Integer> metricsFile = getMetricsFile();
        final QualityYieldMetrics metrics = new QualityYieldMetrics();

        for (final SAMRecord rec : sam) {
            metrics.TOTAL_READS ++;
            final int length = rec.getReadLength();

            final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag();
            if (isPfRead){
                metrics.PF_READS ++;
                metrics.PF_BASES += length;
            }

            metrics.TOTAL_BASES += length;

            final byte[] quals;
            if (USE_ORIGINAL_QUALITIES) {
                byte[] tmp = rec.getOriginalBaseQualities();
                if (tmp == null) tmp = rec.getBaseQualities();
                quals = tmp;
            }
            else {
                quals = rec.getBaseQualities();
            }

            // add up quals, and quals >= 20
            for (int i=0; i<quals.length; ++i) {
                metrics.Q20_EQUIVALENT_YIELD += quals[i];
                if (quals[i] >= 20) metrics.Q20_BASES++;
                if (quals[i] >= 30) metrics.Q30_BASES++;

                if (isPfRead){
                    metrics.PF_Q20_EQUIVALENT_YIELD += quals[i];
                    if (quals[i] >= 20) metrics.PF_Q20_BASES++;
                    if (quals[i] >= 30) metrics.PF_Q30_BASES++;
                }
            }

            progress.record(rec);
        }
       
        metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS);
        metrics.Q20_EQUIVALENT_YIELD =  metrics.Q20_EQUIVALENT_YIELD / 20 ;
        metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20 ;
View Full Code Here

        }
        try {
            final SAMFileReader reader = new SAMFileReader(INPUT);
            final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
            final CloseableIterator<SAMRecord> it = reader.iterator();
            final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

            // If the read (or its mate) maps off the end of the alignment, clip it
            while(it.hasNext()) {
                final SAMRecord rec = it.next();

                // If the read (or its mate) maps off the end of the alignment, clip it
                AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

                // check the read's mapping quality
                if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
                    rec.setMappingQuality(0);
                }

                writer.addAlignment(rec);
                progress.record(rec);
            }

            writer.close();
            it.close();
        }
View Full Code Here

        final Map<String,Boolean> decisions = new HashMap<String,Boolean>();

        long total = 0;
        long kept  = 0;
       
        final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");

        for (final SAMRecord rec : in) {
            if (rec.isSecondaryOrSupplementary()) continue;
            ++total;

            final String key = rec.getReadName();
            final Boolean previous = decisions.remove(key);
            final boolean keeper;

            if (previous == null) {
                keeper = r.nextDouble() <= PROBABILITY;
                if (rec.getReadPairedFlag()) decisions.put(key, keeper);
            }
            else {
                keeper = previous;
            }

            if (keeper) {
                out.addAlignment(rec);
                ++kept;
            }

            progress.record(rec);
        }

        out.close();
        log.info("Finished! Kept " + kept + " out of " + total + " reads.");
View Full Code Here

        // Now copy over the file while marking all the necessary indexes as duplicates
        long recordInFileIndex = 0;
        long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1);

        final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written");
        final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            if (!rec.isSecondaryOrSupplementary()) {
                final String library = libraryIdGenerator.getLibraryName(header, rec);
                DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
                if (metrics == null) {
                    metrics = new DuplicationMetrics();
                    metrics.LIBRARY = library;
                    libraryIdGenerator.addMetricsByLibrary(library, metrics);
                }

                // First bring the simple metrics up to date
                if (rec.getReadUnmappedFlag()) {
                    ++metrics.UNMAPPED_READS;
                } else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
                    ++metrics.UNPAIRED_READS_EXAMINED;
                } else {
                    ++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end
                }


                if (recordInFileIndex == nextDuplicateIndex) {
                    rec.setDuplicateReadFlag(true);

                    // Update the duplication metrics
                    if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
                        ++metrics.UNPAIRED_READ_DUPLICATES;
                    } else {
                        ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end
                    }

                    // Now try and figure out the next duplicate index
                    if (this.duplicateIndexes.hasNext()) {
                        nextDuplicateIndex = this.duplicateIndexes.next();
                    } else {
                        // Only happens once we've marked all the duplicates
                        nextDuplicateIndex = -1;
                    }
                } else {
                    rec.setDuplicateReadFlag(false);
                }
            }
            recordInFileIndex++;

            if (!this.REMOVE_DUPLICATES || !rec.getDuplicateReadFlag()) {
                if (PROGRAM_RECORD_ID != null) {
                    rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
                }
                out.addAlignment(rec);
                progress.record(rec);
            }
        }

        // remember to close the inputs
        iterator.close();
View Full Code Here

        final SamHeaderAndIterator headerAndIterator = openInputs();
        final SAMFileHeader header = headerAndIterator.header;
        final ReadEndsForMarkDuplicatesMap tmp = new DiskBasedReadEndsForMarkDuplicatesMap(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP);
        long index = 0;
        final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read");
        final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;

        if (null == this.libraryIdGenerator) {
            this.libraryIdGenerator = new LibraryIdGenerator(header);
        }

        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();

            // This doesn't have anything to do with building sorted ReadEnd lists, but it can be done in the same pass
            // over the input
            if (PROGRAM_RECORD_ID != null) {
                // Gather all PG IDs seen in merged input files in first pass.  These are gathered for two reasons:
                // - to know how many different PG records to create to represent this program invocation.
                // - to know what PG IDs are already used to avoid collisions when creating new ones.
                // Note that if there are one or more records that do not have a PG tag, then a null value
                // will be stored in this set.
                pgIdsSeen.add(rec.getStringAttribute(SAMTag.PG.name()));
            }

            if (rec.getReadUnmappedFlag()) {
                if (rec.getReferenceIndex() == -1) {
                    // When we hit the unmapped reads with no coordinate, no reason to continue.
                    break;
                }
                // If this read is unmapped but sorted with the mapped reads, just skip it.
            } else if (!rec.isSecondaryOrSupplementary()) {
                final ReadEndsForMarkDuplicates fragmentEnd = buildReadEnds(header, index, rec);
                this.fragSort.add(fragmentEnd);

                if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                    final String key = rec.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + rec.getReadName();
                    ReadEndsForMarkDuplicates pairedEnds = tmp.remove(rec.getReferenceIndex(), key);

                    // See if we've already seen the first end or not
                    if (pairedEnds == null) {
                        pairedEnds = buildReadEnds(header, index, rec);
                        tmp.put(pairedEnds.read2ReferenceIndex, key, pairedEnds);
                    } else {
                        final int sequence = fragmentEnd.read1ReferenceIndex;
                        final int coordinate = fragmentEnd.read1Coordinate;

                        // Set orientationForOpticalDuplicates, which always goes by the first then the second end for the strands.  NB: must do this
                        // before updating the orientation later.
                        if (rec.getFirstOfPairFlag()) {
                            pairedEnds.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(rec.getReadNegativeStrandFlag(), pairedEnds.orientation == ReadEnds.R);
                        } else {
                            pairedEnds.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(pairedEnds.orientation == ReadEnds.R, rec.getReadNegativeStrandFlag());
                        }

                        // If the second read is actually later, just add the second read data, else flip the reads
                        if (sequence > pairedEnds.read1ReferenceIndex ||
                                (sequence == pairedEnds.read1ReferenceIndex && coordinate >= pairedEnds.read1Coordinate)) {
                            pairedEnds.read2ReferenceIndex = sequence;
                            pairedEnds.read2Coordinate = coordinate;
                            pairedEnds.read2IndexInFile = index;
                            pairedEnds.orientation = ReadEnds.getOrientationByte(pairedEnds.orientation == ReadEnds.R,
                                    rec.getReadNegativeStrandFlag());
                        } else {
                            pairedEnds.read2ReferenceIndex = pairedEnds.read1ReferenceIndex;
                            pairedEnds.read2Coordinate = pairedEnds.read1Coordinate;
                            pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
                            pairedEnds.read1ReferenceIndex = sequence;
                            pairedEnds.read1Coordinate = coordinate;
                            pairedEnds.read1IndexInFile = index;
                            pairedEnds.orientation = ReadEnds.getOrientationByte(rec.getReadNegativeStrandFlag(),
                                    pairedEnds.orientation == ReadEnds.R);
                        }

                        pairedEnds.score += DuplicateScoringStrategy.computeDuplicateScore(rec, this.DUPLICATE_SCORING_STRATEGY);
                        this.pairSort.add(pairedEnds);
                    }
                }
            }

            // Print out some stats every 1m reads
            ++index;
            if (progress.record(rec)) {
                log.info("Tracking " + tmp.size() + " as yet unmatched pairs. " + tmp.sizeInRam() + " records in RAM.");
            }
        }

        log.info("Read " + index + " records. " + tmp.size() + " pairs never matched.");
View Full Code Here

        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);

        // Setup all the inputs
        final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
        final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
        final SAMFileReader in        = new SAMFileReader(INPUT);

        final SamLocusIterator iterator = new SamLocusIterator(in);
        final List<SamRecordFilter> filters   = new ArrayList<SamRecordFilter>();
        final CountingFilter dupeFilter       = new CountingDuplicateFilter();
        final CountingFilter mapqFilter       = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
        final CountingPairedFilter pairFilter = new CountingPairedFilter();
        filters.add(mapqFilter);
        filters.add(dupeFilter);
        filters.add(pairFilter);
        filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
        iterator.setSamFilters(filters);
        iterator.setEmitUncoveredLoci(true);
        iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
        iterator.setQualityScoreCutoff(0);        // Handled separately because we want to count bases
        iterator.setIncludeNonPfReads(false);

        final int max = COVERAGE_CAP;
        final long[] HistogramArray = new long[max + 1];
        final boolean usingStopAfter = STOP_AFTER > 0;
        final long stopAfter = STOP_AFTER-1;
        long counter = 0;

        long basesExcludedByBaseq   = 0;
        long basesExcludedByOverlap = 0;
        long basesExcludedByCapping = 0;

        // Loop through all the loci
        while (iterator.hasNext()) {
            final SamLocusIterator.LocusInfo info = iterator.next();

            // Check that the reference is not N
            final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
            final byte base = ref.getBases()[info.getPosition()-1];
            if (base == 'N') continue;

            // Figure out the coverage while not counting overlapping reads twice, and excluding various things
            final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size());
            for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) {
                if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY)                   { ++basesExcludedByBaseq;   continue; }
                if (!readNames.add(recs.getRecord().getReadName()))                 { ++basesExcludedByOverlap; continue; }
            }

            final int depth = Math.min(readNames.size(), max);
            if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - max;
            HistogramArray[depth]++;

            // Record progress and perhaps stop
            progress.record(info.getSequenceName(), info.getPosition());
            if (usingStopAfter && ++counter > stopAfter) break;
        }

        // Construct and write the outputs
        final Histogram<Integer> histo = new Histogram<Integer>("coverage", "count");
View Full Code Here

        // Write the records to the output file in specified sorted order,
        header.setSortOrder(this.sortOrder);
        final boolean presorted = this.sortOrder == SortOrder.coordinate;
        final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
      writer.setProgressLogger(
          new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
        final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");

        for (final SAMRecord rec : sorted) {
            if (!rec.getReadUnmappedFlag()) {
                if (refSeq != null) {
                    final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
                    rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));

                    if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
                        rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
                    }
                }
            }
            writer.addAlignment(rec);
            finalProgress.record(rec);
        }
        writer.close();
        sorted.cleanup();

        log.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
View Full Code Here

        }
        else {
            sorter = null;
        }

        final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
        for (final SAMRecord rec : in) {
            // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
            if (rec.isSecondaryOrSupplementary()) continue;

            // Actually to the reverting of the remaining records
            revertSamRecord(rec);

            if (sanitizing) sorter.add(rec);
            else out.addAlignment(rec);
            progress.record(rec);
        }

        ////////////////////////////////////////////////////////////////////////////
        // Now if we're sanitizing, clean up the records and write them to the output
        ////////////////////////////////////////////////////////////////////////////
        if (!sanitizing) {
            out.close();
        }
        else {

            long total = 0, discarded = 0;
            final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(sorter.iterator());
            final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<SAMReadGroupRecord, FastqQualityFormat>();

            // Figure out the quality score encoding scheme for each read group.
            for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
                final SamReader reader =  SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY).open(INPUT);
                final SamRecordFilter filter = new SamRecordFilter() {
                    public boolean filterOut(final SAMRecord rec) {
                        return !rec.getReadGroup().getId().equals(rg.getId());
                    }
                    public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                        throw new UnsupportedOperationException();
                    }
                };
                readGroupToFormat.put(rg, QualityEncodingDetector.detect(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, new FilteringIterator(reader.iterator(), filter), RESTORE_ORIGINAL_QUALITIES));
                CloserUtil.close(reader);
            }
            for(final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
                log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
            }
            if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) {
                log.error("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa);
                return -1;
            }


            final ProgressLogger sanitizerProgress = new ProgressLogger(log, 1000000, "Sanitized");

            readNameLoop: while (iterator.hasNext()) {
                final List<SAMRecord> recs = fetchByReadName(iterator);
                total += recs.size();

                // Check that all the reads have bases and qualities of the same length
                for (final SAMRecord rec : recs) {
                    if (rec.getReadBases().length != rec.getBaseQualities().length) {
                        log.debug("Discarding " + recs.size() + " reads with name " + rec.getReadName() + " for mismatching bases and quals length.");
                        discarded += recs.size();
                        continue readNameLoop;
                    }
                }

                // Check that if the first read is marked as unpaired that there is in fact only one read
                if (!recs.get(0).getReadPairedFlag() && recs.size() > 1) {
                    log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because they claim to be unpaired.");
                    discarded += recs.size();
                    continue readNameLoop;
                }

                // Check that if we have paired reads there is exactly one first of pair and one second of pair
                if (recs.get(0).getReadPairedFlag()) {
                    int firsts=0, seconds=0, unpaired=0;
                    for (final SAMRecord rec : recs) {
                        if (!rec.getReadPairedFlag())  ++unpaired;
                        if (rec.getFirstOfPairFlag())  ++firsts;
                        if (rec.getSecondOfPairFlag()) ++seconds;
                    }

                    if (unpaired > 0 || firsts != 1 || seconds != 1) {
                        log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because pairing information in corrupt.");
                        discarded += recs.size();
                        continue readNameLoop;
                    }
                }

                // If we've made it this far spit the records into the output!
                for (final SAMRecord rec : recs) {
                    // The only valid quality score encoding scheme is standard; if it's not standard, change it.
                    final FastqQualityFormat recordFormat = readGroupToFormat.get(rec.getReadGroup());
                    if (!recordFormat.equals(FastqQualityFormat.Standard)) {
                        final byte quals[] = rec.getBaseQualities();
                        for (int i = 0; i < quals.length; i++) {
                            quals[i] -= SolexaQualityConverter.ILLUMINA_TO_PHRED_SUBTRAHEND;
                        }
                        rec.setBaseQualities(quals);
                    }
                    out.addAlignment(rec);
                    sanitizerProgress.record(rec);
                }
            }

            out.close();
View Full Code Here

            throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
            ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
        }
        final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

        final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
        for (final SAMRecord rec : recordReader) {
            rec.setHeader(replacementHeader);
            writer.addAlignment(rec);
            progress.record(rec);
        }
        writer.close();
        recordReader.close();
    }
View Full Code Here

TOP

Related Classes of htsjdk.samtools.util.ProgressLogger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.