Package org.broadinstitute.gatk.utils.sam

Examples of org.broadinstitute.gatk.utils.sam.GATKSAMRecord


        }

        final int[] hapReadsNotInReference = new int[haplotypes.size()];

        for (int i = 0; i < readCount; i++) {
            final GATKSAMRecord r = as.readList().get(i);

            final int hapNumber = i % haplotypes.size();
            final int offset = i % (haplotypes.get(hapNumber).length() - readLength);
            Assert.assertEquals(r.getReadString(),haplotypes.get(hapNumber).getBaseString().substring(offset,offset+readLength));
            if (as.getReference().indexOf(r.getReadString()) == -1) {
                hapReadsNotInReference[hapNumber]++;
            }
        }

        Assert.assertEquals(hapReadsNotInReference[0],0);
View Full Code Here


            correctedReads.addAll(reads);
        }
        else {
            computeKmerCorrectionMap();
            for (final GATKSAMRecord read: reads) {
                final GATKSAMRecord correctedRead = correctRead(read);
                if (trimLowQualityBases)
                    correctedReads.add(ReadClipper.hardClipLowQualEnds(correctedRead, minTailQuality));
                else
                    correctedReads.add(correctedRead);
            }
View Full Code Here

                inputRead.setReadBases(correctedBases);
                inputRead.setBaseQualities(correctedQuals);
                return inputRead;
            }
            else {
                GATKSAMRecord correctedRead = new GATKSAMRecord(inputRead);

                //  do the actual correction
                // todo - do we need to clone anything else from read?
                correctedRead.setBaseQualities(inputRead.getBaseQualities());
                correctedRead.setIsStrandless(inputRead.isStrandless());
                correctedRead.setReadBases(inputRead.getReadBases());
                correctedRead.setReadString(inputRead.getReadString());
                correctedRead.setReadGroup(inputRead.getReadGroup());
                return correctedRead;
            }
        }
        else {
            readErrorCorrectionStats.numReadsUncorrected++;
View Full Code Here

            return;

        if ( isLeftOverhang(read.loc, splice.loc) ) {
            final int overhang = splice.loc.getStop() - read.loc.getStart() + 1;
            if ( overhangingBasesMismatch(read.read.getReadBases(), 0, splice.reference, splice.reference.length - overhang, overhang) ) {
                final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, 0, overhang - 1);
                read.setRead(clippedRead);
            }
        }
        else if ( isRightOverhang(read.loc, splice.loc) ) {
            final int overhang = read.loc.getStop() - splice.loc.getStart() + 1;
            if ( overhangingBasesMismatch(read.read.getReadBases(), read.read.getReadLength() - overhang, splice.reference, 0, overhang) ) {
                final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, read.read.getReadLength() - overhang, read.read.getReadLength() - 1);
                read.setRead(clippedRead);
            }
        }
    }
View Full Code Here

                if (DEBUG) {
                    System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
                }

                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());

                // if the read extends beyond the downstream (right) end of the reference window, clip it
                if ( mustClipDownstream(read, refWindowStop) )
                    read = ReadClipper.hardClipByReadCoordinates(read, refWindowStop - read.getSoftStart() + 1, read.getReadLength() - 1);

                // if the read extends beyond the upstream (left) end of the reference window, clip it
                if ( mustClipUpstream(read, refWindowStart) )
                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, refWindowStart);

                if (read.isEmpty())
                    continue;

                // hard-clip low quality ends - this may introduce extra H elements in CIGAR string
                read = ReadClipper.hardClipLowQualEnds(read, (byte) BASE_QUAL_THRESHOLD );

                if (read.isEmpty())
                    continue;

                // get bases of candidate haplotypes that overlap with reads
                final long readStart = read.getSoftStart();
                final long readEnd = read.getSoftEnd();

                // see if we want to use soft clipped bases. Aligners may soft clip all bases at insertions because they don't match,
                // but they're actually consistent with the insertion!
                // Rule: if a read starts in interval [eventStart-eventLength,eventStart+1] and we are at an insertion, we'll use all soft clipped bases at the beginning.
                // Conversely, if a read ends at [eventStart,eventStart+eventLength] we'll use all soft clipped bases in the end of the read.
                final long eventStartPos = ref.getLocus().getStart();

                // compute total number of clipped bases (soft or hard clipped) and only use them if necessary
                final boolean softClips = useSoftClippedBases(read, eventStartPos, eventLength);
                final int numStartSoftClippedBases = softClips ? read.getAlignmentStart()- read.getSoftStart() : 0;
                final int numEndSoftClippedBases = softClips ? read.getSoftEnd()- read.getAlignmentEnd() : 0 ;
                final byte [] unclippedReadBases = read.getReadBases();
                final byte [] unclippedReadQuals = read.getBaseQualities();

                /**
                 * Compute genomic locations that candidate haplotypes will span.
                 * Read start and stop locations (variables readStart and readEnd) are the original unclipped positions from SAMRecord,
                 * adjusted by hard clips from Cigar string and by qual-based soft-clipping performed above.
                 * We will propose haplotypes that overlap the read with some padding.
                 * True read start = readStart + numStartSoftClippedBases - ReadUtils.getFirstInsertionOffset(read)
                 * Last term is because if a read starts with an insertion then these bases are not accounted for in readStart.
                 * trailingBases is a padding constant(=3) and we additionally add abs(eventLength) to both sides of read to be able to
                 * differentiate context between two haplotypes
                 */
                final int absEventLength = Math.abs(eventLength);
                long startLocationInRefForHaplotypes = Math.max(readStart + numStartSoftClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read) - absEventLength, 0);
                long stopLocationInRefForHaplotypes = readEnd - numEndSoftClippedBases + trailingBases + ReadUtils.getLastInsertionOffset(read) + absEventLength;

                if (DEBUG)
                    System.out.format("orig Start:%d orig stop: %d\n", startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes);

                int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;

                if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
                    startLocationInRefForHaplotypes = ref.getWindow().getStart();                                       // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
                }
                else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
                    startLocationInRefForHaplotypes = ref.getWindow().getStop();                                        // read starts after haplotype: read will have to be clipped completely;
                }

                // candidate haplotype cannot go beyond reference context
                if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
                    stopLocationInRefForHaplotypes = ref.getWindow().getStop();                                         // check also if end of read will go beyond reference context
                }

                if (stopLocationInRefForHaplotypes <= startLocationInRefForHaplotypes + readLength) {
                    stopLocationInRefForHaplotypes = startLocationInRefForHaplotypes + readLength-1;                    // if there's an insertion in the read, the read stop position will be less than start + read legnth, but we want to compute likelihoods in the whole region that a read might overlap
                }

                // ok, we now figured out the total number of clipped bases on both ends.
                // Figure out where we want to place the haplotype to score read against

                if (DEBUG)
                    System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
                            numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());

               // LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();

                /**
                 * Check if we'll end up with an empty read once all clipping is done
                 */
                if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) {
                    int j=0;
                    for (Allele a: haplotypeMap.keySet()) {
                        perReadAlleleLikelihoodMap.add(p,a,0.0);
                        readLikelihoods[readIdx][j++] = 0.0;
                    }
                }
                else {
                    final int endOfCopy = unclippedReadBases.length - numEndSoftClippedBases;
                    final byte[] readBases = Arrays.copyOfRange(unclippedReadBases, numStartSoftClippedBases, endOfCopy);
                    final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals, numStartSoftClippedBases, endOfCopy);

                    int j=0;

                    final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
                    final byte[] contextLogGapContinuationProbabilities  = new byte[readBases.length];

                    // get homopolymer length profile for current haplotype
                    final int[] hrunProfile = new int[readBases.length];
                    getContextHomopolymerLength(readBases,hrunProfile);
                    fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);

                    // get the base insertion and deletion qualities to use
                    final byte[] baseInsertionQualities, baseDeletionQualities;
                    if ( read.hasBaseIndelQualities() ) {
                        baseInsertionQualities = Arrays.copyOfRange(read.getBaseInsertionQualities(), numStartSoftClippedBases, endOfCopy);
                        baseDeletionQualities = Arrays.copyOfRange(read.getBaseDeletionQualities(), numStartSoftClippedBases, endOfCopy);
                    } else {
                        baseInsertionQualities = contextLogGapOpenProbabilities;
                        baseDeletionQualities = contextLogGapOpenProbabilities;
                    }

                    // Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM
                    final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities);

                    // Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM
                    final Map<GATKSAMRecord,byte[]> readGCPArrayMap = Collections.singletonMap(processedRead,contextLogGapContinuationProbabilities);

                    // Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations
View Full Code Here

        }
    }

    @Override
    public GATKSAMRecord map(final ReferenceContext ref, final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker) {
        GATKSAMRecord workingRead = read;

        for ( final RNAReadTransformer transformer : rnaReadTransformers ) {
            workingRead = transformer.apply(workingRead);                    // TODO: when a read transformer can be called directly from the command line we won't need that mechanism any more
        }
View Full Code Here

     * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible but not so good
     */
    private void writeHaplotype(final Haplotype haplotype,
                                final GenomeLoc paddedRefLoc,
                                final boolean isAmongBestHaplotypes) {
        final GATKSAMRecord record = new GATKSAMRecord(output.getHeader());
        record.setReadBases(haplotype.getBases());
        record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
        record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
        record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
        record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
        record.setReadName("HC" + uniqueNameCounter++);
        record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG,haplotype.hashCode());
        record.setReadUnmappedFlag(false);
        record.setReferenceIndex(paddedRefLoc.getContigIndex());
        record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID);
        record.setFlags(16);
        output.add(record);
    }
View Full Code Here

    }

    @Test
    public void realignAtContigBorderTest() {
        final int contigEnd = header.getSequence(0).getSequenceLength();
        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2);
        read.setCigarString("2M");
        Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false);
        read.setCigarString("1M1D1M");
        Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true);
    }
View Full Code Here

    }

    @Test(dataProvider = "ClipUpstreamProvider", enabled = true)
    public void clipUpstreamTest(final int readStart, final int readLength, final int delLength) {

        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength);
        if ( delLength == 0 )
            read.setCigarString(readLength + "M");
        else
            read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M");

        final boolean result = PairHMMIndelErrorModel.mustClipUpstream(read, refWindowStart);
        Assert.assertEquals(result, read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart);
    }
View Full Code Here

    }

    @Test(dataProvider = "ClipDownstreamProvider", enabled = true)
    public void clipDownstreamTest(final int readStart, final int readLength, final int delLength) {

        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength);
        if ( delLength == 0 )
            read.setCigarString(readLength + "M");
        else
            read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M");

        final boolean result = PairHMMIndelErrorModel.mustClipDownstream(read, refWindowEnd);
        Assert.assertEquals(result, read.getSoftStart() < refWindowEnd && read.getSoftStart() + readLength > refWindowEnd);
    }
View Full Code Here

TOP

Related Classes of org.broadinstitute.gatk.utils.sam.GATKSAMRecord

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.