Examples of org.broadinstitute.gatk.utils.GenomeLoc

org.broadinstitute.gatk.utils.GenomeLoc
Created by IntelliJ IDEA. User: mdepristo Date: Mar 2, 2009 Time: 8:50:11 AM Genome location representation. It is *** 1 *** based closed. Note that GenomeLocs start and stop values can be any positive or negative number, by design. Bound validation is a feature of the GenomeLocParser, and not a fundamental constraint of the GenomeLoc

            sumOfSquares = sumOfSquares.add(bigSize.pow(2));
            sum = sum.add(bigSize);
            numberOfShards++;


            if(numberOfShards % 1000 == 0) {
                GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);
                logger.info(String.format("PROGRESS: Calculating mean and variance: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size));
            }


        }


        // Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
        long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
        long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
        logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size  = %d%n",numberOfShards,mean,stddev));


        // Crank through the shards again, this time reporting on the shards significantly larger than the mean.
        long threshold = mean + stddev*5;
        logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
        out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");


        sharder =  IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();


            // Bounding region.
            GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);


            // Size of the file pointer.
            final long size = filePointer.size();            


            numberOfShards++;


            if(filePointer.size() <= threshold) {
                if(numberOfShards % 1000 == 0) 
                    logger.info(String.format("PROGRESS: Searching for large shards: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size));
                continue;
            }


            out.printf("%s\t%d\t%d\t%d%n",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size);
        }


        return 0;
    }

View Full Code Here

     * @param contig the contig we are about to process
     */
    protected void loadPresetRegionsForContigToWorkQueue(final String contig) {
        if ( ! walkerHasPresetRegions ) throw new IllegalStateException("only appropriate to call when walker has preset regions");


        final GenomeLoc contigSpan = engine.getGenomeLocParser().createOverEntireContig(contig);
        for ( final GenomeLoc loc : this.walker.getPresetActiveRegions().getOverlapping(contigSpan) ) {
            workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
        }
    }

View Full Code Here

     *
     * @param read the last read we've seen during the traversal
     */
    @Requires({"read != null"})
    protected void rememberLastReadLocation(final GATKSAMRecord read) {
        final GenomeLoc currentLocation = engine.getGenomeLocParser().createGenomeLoc(read);
        if ( spanOfLastReadSeen == null )
            spanOfLastReadSeen = currentLocation;
        else {
            if ( currentLocation.isBefore(spanOfLastReadSeen) )
                throw new IllegalStateException("Updating last read seen in the traversal with read " + read + " with span " + currentLocation + " but this occurs before the previously seen read " + spanOfLastReadSeen);
            spanOfLastReadSeen = currentLocation;
        }
    }

View Full Code Here

                                                  final ActiveRegionWalker<M, T> walker,
                                                  final IntervalReferenceOrderedView referenceOrderedDataView) {
        final List<GATKSAMRecord> stillLive = new LinkedList<>();
        for ( final GATKSAMRecord read : myReads.popCurrentReads() ) {
            boolean killed = false;
            final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );


            if( activeRegion.getLocation().overlapsP( readLoc ) ) {
                activeRegion.add(read);


                if ( ! walker.wantsNonPrimaryReads() ) {
                    killed = true;
                }
            } else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
                activeRegion.add( read );
            }


            // if the read hasn't already been killed, check if it cannot occur in any more active regions, and maybe kill it
            if ( ! killed && readCannotOccurInAnyMoreActiveRegions(read, activeRegion) ) {
                killed = true;
            }


            // keep track of all of the still live active regions
            if ( ! killed ) stillLive.add(read);
        }
        myReads.addAll(stillLive);


        if ( logger.isDebugEnabled() ) {
            logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive() ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReadSpanLoc());
        }


        if ( LOG_READ_CARRYING )
            logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads.  Overall max reads carried is %s",
                    activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));


        // prepare the RefMetaDataTracker information
        final GenomeLoc loc = activeRegion.getLocation();
        // get all of the RODs that cover the active region (without extension)
        final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataForInterval(loc);
        // trim away all of the features that occurred before this location, as we will not need them in the future
        referenceOrderedDataView.trimCurrentFeaturesToLoc(loc);

View Full Code Here


            // the rodSpan covers all of the bases in the activity profile, including all of the bases
            // through the current window interval.  This is because we may issue a query to get data for an
            // active region spanning before the current interval as far back as the start of the current profile,
            // if we have pending work to do that finalizes in this interval.
            final GenomeLoc rodSpan = activityProfile.getSpan() == null ? currentWindow : activityProfile.getSpan().endpointSpan(currentWindow);
            if ( ! dataProvider.getShard().getLocation().containsP(rodSpan) ) throw new IllegalStateException("Rod span " + rodSpan + " isn't contained within the data shard " + dataProvider.getShard().getLocation() + ", meaning we wouldn't get all of the data we need");
            referenceOrderedDataView = new IntervalReferenceOrderedView( dataProvider, rodSpan );


            // We keep processing while the next reference location is within the interval
            locOfLastReadAtTraversalStart = spanOfLastSeenRead();

View Full Code Here

                return false;
            else {


                while( locusView.hasNext() ) {
                    final AlignmentContext locus = locusView.next();
                    final GenomeLoc location = locus.getLocation();


                    rememberLastLocusLocation(location);


                    // get all of the new reads that appear in the current pileup, and them to our list of reads
                    // provided we haven't seen them before
                    final Collection<GATKSAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
                    for( final GATKSAMRecord read : reads ) {
                        // note that ActiveRegionShards span entire contigs, so this check is in some
                        // sense no longer necessary, as any read that appeared in the last shard would now
                        // by definition be on a different contig.  However, the logic here doesn't hurt anything
                        // and makes us robust should we decided to provide shards that don't fully span
                        // contigs at some point in the future
                        if ( ! appearedInLastShard(locOfLastReadAtTraversalStart, read) ) {
                            rememberLastReadLocation(read);
                            myReads.add(read);
                        }
                    }


                    // skip this location -- it's not part of our engine intervals
                    if ( outsideEngineIntervals(location) )
                        continue;


                    // we've move across some interval boundary, restart profile
                    final boolean flushProfile = ! activityProfile.isEmpty()
                            && ( activityProfile.getContigIndex() != location.getContigIndex()
                            || location.getStart() != activityProfile.getStop() + 1);
                    final List<MapData> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false, referenceOrderedDataView);


                    dataProvider.getShard().getReadMetrics().incrementNumIterations();


                    // create reference context. Note that if we have a pileup of "extended events", the context will

View Full Code Here

        if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
            // only do this if the walker isn't done!
            final RodLocusView rodLocusView = (RodLocusView)locusView;
            final long nSkipped = rodLocusView.getLastSkippedBases();
            if ( nSkipped > 0 ) {
                final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
                final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
                final M x = walker.map(null, null, ac);
                sum = walker.reduce(x, sum);
            }
        }

View Full Code Here

        }


        @Override
        public MapData next() {
            final AlignmentContext locus = locusView.next();
            final GenomeLoc location = locus.getLocation();


            //logger.info("Pulling data from MapDataIterator at " + location);


            // create reference context. Note that if we have a pileup of "extended events", the context will
            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).

View Full Code Here

             if ( r == null ) {
                 it.next();
                 continue;
             }


             GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
             GenomeLoc thatContig = r.getLocation();


             if ( currentContig.isPast(thatContig) )
                 throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() +
                         " occurs out of order in track " + r.getName() );
             if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet...

View Full Code Here


        while ( it.hasNext() ) {
            GATKFeature r = it.next();
            if ( r == null ) continue;


            GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
            GenomeLoc thatContig = r.getLocation();


            if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet
            if ( currentContig.isBefore(thatContig) ) {
                it.pushback(r); // next record is on the higher contig, we do not need it yet...
                break;

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.broadinstitute.gatk.utils.GenomeLoc

org.broad.igv.plugin.mongovariant.VariantReviewSource

org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils

org.broadinstitute.gatk.engine.datasources.providers.AllLocusViewUnitTest

org.broadinstitute.gatk.engine.datasources.providers.CoveredLocusViewUnitTest

org.broadinstitute.gatk.engine.datasources.providers.IntervalOverlappingRODsFromStream

org.broadinstitute.gatk.engine.datasources.providers.IntervalReferenceOrderedViewUnitTest

org.broadinstitute.gatk.engine.datasources.providers.IntervalReferenceOrderedViewUnitTest$TribbleIteratorFromCollection

org.broadinstitute.gatk.engine.datasources.providers.LocusReferenceView

org.broadinstitute.gatk.engine.datasources.providers.LocusReferenceViewUnitTest

org.broadinstitute.gatk.engine.datasources.providers.LocusViewTemplate

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.