Examples of org.broadinstitute.gatk.utils.haplotype.Haplotype

Package org.broadinstitute.gatk.utils.haplotype

Examples of org.broadinstitute.gatk.utils.haplotype.Haplotype

org.broadinstitute.gatk.utils.haplotype.Haplotype

            finders.add(haplotypeFinder);
            final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);


            while (bestHaplotypes.hasNext()) {
                final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
                final Haplotype h = kBestHaplotype.haplotype();
                if( !returnHaplotypes.contains(h) ) {
                    final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(),h.getBases());


                    if ( cigar == null ) {
                        failedCigars++; // couldn't produce a meaningful alignment of haplotype to reference, fail quietly
                        continue;
                    } else if( cigar.isEmpty() ) {
                        throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() +
                                " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
                    } else if ( pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH ) {
                        // N cigar elements means that a bubble was too divergent from the reference so skip over this path
                        continue;
                    } else if( cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // SW failure
                        throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length "
                                + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength()
                                + " ref = " + refHaplotype + " path " + new String(h.getBases()));
                    }


                    h.setCigar(cigar);
                    h.setAlignmentStartHapwrtRef(activeRegionStart);
                    h.setGenomeLocation(activeRegionWindow);
                    returnHaplotypes.add(h);
                    assemblyResultSet.add(h, assemblyResultByGraph.get(graph));


                    if ( debug )
                        logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
                }
            }
        }


        // Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as
        // the first returned by any finder.
        if (!returnHaplotypes.contains(refHaplotype)) {
            double refScore = Double.NaN;
            for (final KBestHaplotypeFinder finder : finders) {
                final double candidate = finder.score(refHaplotype);
                if (Double.isNaN(candidate)) continue;
                refScore = candidate;
                break;
            }
            refHaplotype.setScore(refScore);
            returnHaplotypes.add(refHaplotype);
        }


        if (failedCigars != 0)
            logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.",failedCigars,refLoc.toString()));


        if( debug ) {
            if( returnHaplotypes.size() > 1 ) {
                logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
            } else {
                logger.info("Found only the reference haplotype in the assembly graph.");
            }
            for( final Haplotype h : returnHaplotypes ) {
                logger.info( h.toString() );
                logger.info( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
            }
        }


        return new ArrayList<>(returnHaplotypes);

View Full Code Here


        haplotypes = new ArrayList<>(assemblyResultSet.getHaplotypeList());
        Collections.sort(haplotypes, Haplotype.ALPHANUMERICAL_COMPARATOR);


        // make sure that kmerSize is not bigger than the smallest haplotype. It can well happen when there are cycles and kmerSize inflates.
        final Haplotype referenceHaplotype = assemblyResultSet.getReferenceHaplotype();
        int minHaplotypeLength = referenceHaplotype.length();
        for (final Haplotype h : haplotypes)
            if (minHaplotypeLength > h.length())
                minHaplotypeLength = h.length();


        // Determine the kmerSize to use for the unified haplotype assembly graph

View Full Code Here

     * @param costsEndingByVertex       read costs assorted by their end vertex.
     */
    private void calculatePerReadAlleleLikelihoodMapHaplotypeProcessing(final int haplotypeIndex,
                                                                        final ReadLikelihoods.Matrix<Haplotype> likelihoods,
                                                                        final Map<MultiDeBruijnVertex, Set<ReadSegmentCost>> costsEndingByVertex) {
        final Haplotype haplotype = likelihoods.alleleAt(haplotypeIndex);
        final HaplotypeRoute haplotypeRoute = haplotypeGraph.getHaplotypeRoute(haplotype);
        final Set<MultiDeBruijnVertex> haplotypeVertices = haplotypeRoute.vertexSet();
        final Map<GATKSAMRecord, ReadCost> readCostByRead = new HashMap<>();
        final Set<MultiDeBruijnVertex> visitedVertices = new HashSet<>(haplotypeVertices.size());
        final List<MultiSampleEdge> edgeList = haplotypeRoute.getEdges();

View Full Code Here


        for( final VariantContext compVC : givenHaplotypes ) {
            if (!GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow))
                throw new IllegalArgumentException(" some variant provided does not overlap with active region window");
            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
                if( insertedRefHaplotype != null ) { // can be null if the requested allele can't be inserted into the haplotype
                    returnHaplotypes.add(insertedRefHaplotype);
                }
            }
        }

View Full Code Here

        final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles();
        final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size());


        for (final ReadLikelihoods<Haplotype>.BestAllele bestAllele : bestAlleles) {
            final GATKSAMRecord originalRead = bestAllele.read;
            final Haplotype bestHaplotype = bestAllele.allele;
            final boolean isInformative = bestAllele.isInformative();
            final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead,bestHaplotype,paddedReferenceLoc.getStart(),isInformative);
            result.put(originalRead,realignedRead);
        }
        return result;

View Full Code Here

        // Create the reference haplotype which is the bases from the reference that make up the active region
        finalizeActiveRegion(activeRegion); // handle overlapping fragments, clip adapter and low qual tails


        final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING);
        final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion);
        final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc);


        // Create ReadErrorCorrector object if requested - will be used within assembly engine.
        ReadErrorCorrector readErrorCorrector = null;
        if (errorCorrectReads)
            readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, minObservationsForKmerToBeSolid, SCAC.DEBUG, fullReferenceWithPadding);

View Full Code Here

            if ( needsToBeFinalized )
                finalizeActiveRegion(region);
            filterNonPassingReads(region);


            final GenomeLoc paddedLoc = region.getExtendedLoc();
            final Haplotype refHaplotype = createReferenceHaplotype(region, paddedLoc);
            final List<Haplotype> haplotypes = Collections.singletonList(refHaplotype);
            return referenceConfidenceModel.calculateRefConfidence(refHaplotype, haplotypes,
                    paddedLoc, region, createDummyStratifiedReadMap(refHaplotype, samplesList, region),
                    genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), Collections.<VariantContext>emptyList());
        } else

View Full Code Here

            assembler.setPruneFactor(0);
        }


        public void addSequence(final byte[] bases, final boolean isRef) {
            if ( isRef ) {
                refHaplotype = new Haplotype(bases, true);
            } else {
                final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, Utils.dupBytes((byte)30,bases.length), bases.length + "M");
                reads.add(read);
            }
        }

View Full Code Here

                   final Map<String,Civar> civarBySequence = new HashMap<>(haplotypes.size());
                   final Map<String,Haplotype> haplotypeBySequence = new HashMap<>(haplotypes.size());
                   civarByAllele = new HashMap<>(haplotypes.size());
                   final List<Civar> unrolledCivars = dataSet.unrolledCivars();
                   for (int i = 0; i < haplotypes.size(); i++)  {
                       final Haplotype h = haplotypes.get(i);
                       haplotypeBySequence.put(h.getBaseString(),h);
                       civarBySequence.put(h.getBaseString(),unrolledCivars.get(i));
                   }
                   for (final Allele a : loglessLks.getAllelesSet()) {
                       alleleByHaplotype.put(haplotypeBySequence.get(a.getBaseString()),a);
                       civarByAllele.put(a,civarBySequence.get(a.getBaseString()));
                   }

View Full Code Here

    }


    @Test
    public void testAddReferenceHaplotype() {


        final Haplotype ref = new Haplotype("ACGT".getBytes(),true);
        ref.setGenomeLocation(genomeLocParser.createGenomeLoc("chr1",1,ref.length() + 1 ));
        final AssemblyResultSet subject = new AssemblyResultSet();


        Assert.assertTrue(subject.add(ref));
        Assert.assertFalse(subject.add(ref));

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.broadinstitute.gatk.utils.haplotype.Haplotype

htsjdk.samtools.Cigar

htsjdk.samtools.CigarElement

htsjdk.variant.variantcontext.Allele

org.broadinstitute.gatk.tools.walkers.haplotypecaller.ActiveRegionTestDataSetUnitTest

org.broadinstitute.gatk.tools.walkers.haplotypecaller.AssemblyResultSet

org.broadinstitute.gatk.tools.walkers.haplotypecaller.AssemblyResultSetUnitTest

org.broadinstitute.gatk.tools.walkers.haplotypecaller.GraphBasedLikelihoodCalculationEngineInstance

org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.KBestHaplotype

org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller

org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCallerGenotypingEngineUnitTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.