Package org.broadinstitute.gatk.utils

Examples of org.broadinstitute.gatk.utils.GenomeLoc


            sumOfSquares = sumOfSquares.add(bigSize.pow(2));
            sum = sum.add(bigSize);
            numberOfShards++;

            if(numberOfShards % 1000 == 0) {
                GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);
                logger.info(String.format("PROGRESS: Calculating mean and variance: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size));
            }

        }

        // Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
        long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
        long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
        logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size  = %d%n",numberOfShards,mean,stddev));

        // Crank through the shards again, this time reporting on the shards significantly larger than the mean.
        long threshold = mean + stddev*5;
        logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
        out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");

        sharder =  IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();

            // Bounding region.
            GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);

            // Size of the file pointer.
            final long size = filePointer.size();           

            numberOfShards++;

            if(filePointer.size() <= threshold) {
                if(numberOfShards % 1000 == 0)
                    logger.info(String.format("PROGRESS: Searching for large shards: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size));
                continue;
            }

            out.printf("%s\t%d\t%d\t%d%n",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size);
        }

        return 0;
    }
View Full Code Here


     * @param contig the contig we are about to process
     */
    protected void loadPresetRegionsForContigToWorkQueue(final String contig) {
        if ( ! walkerHasPresetRegions ) throw new IllegalStateException("only appropriate to call when walker has preset regions");

        final GenomeLoc contigSpan = engine.getGenomeLocParser().createOverEntireContig(contig);
        for ( final GenomeLoc loc : this.walker.getPresetActiveRegions().getOverlapping(contigSpan) ) {
            workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
        }
    }
View Full Code Here

     *
     * @param read the last read we've seen during the traversal
     */
    @Requires({"read != null"})
    protected void rememberLastReadLocation(final GATKSAMRecord read) {
        final GenomeLoc currentLocation = engine.getGenomeLocParser().createGenomeLoc(read);
        if ( spanOfLastReadSeen == null )
            spanOfLastReadSeen = currentLocation;
        else {
            if ( currentLocation.isBefore(spanOfLastReadSeen) )
                throw new IllegalStateException("Updating last read seen in the traversal with read " + read + " with span " + currentLocation + " but this occurs before the previously seen read " + spanOfLastReadSeen);
            spanOfLastReadSeen = currentLocation;
        }
    }
View Full Code Here

                                                  final ActiveRegionWalker<M, T> walker,
                                                  final IntervalReferenceOrderedView referenceOrderedDataView) {
        final List<GATKSAMRecord> stillLive = new LinkedList<>();
        for ( final GATKSAMRecord read : myReads.popCurrentReads() ) {
            boolean killed = false;
            final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );

            if( activeRegion.getLocation().overlapsP( readLoc ) ) {
                activeRegion.add(read);

                if ( ! walker.wantsNonPrimaryReads() ) {
                    killed = true;
                }
            } else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
                activeRegion.add( read );
            }

            // if the read hasn't already been killed, check if it cannot occur in any more active regions, and maybe kill it
            if ( ! killed && readCannotOccurInAnyMoreActiveRegions(read, activeRegion) ) {
                killed = true;
            }

            // keep track of all of the still live active regions
            if ( ! killed ) stillLive.add(read);
        }
        myReads.addAll(stillLive);

        if ( logger.isDebugEnabled() ) {
            logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive() ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReadSpanLoc());
        }

        if ( LOG_READ_CARRYING )
            logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads.  Overall max reads carried is %s",
                    activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));

        // prepare the RefMetaDataTracker information
        final GenomeLoc loc = activeRegion.getLocation();
        // get all of the RODs that cover the active region (without extension)
        final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataForInterval(loc);
        // trim away all of the features that occurred before this location, as we will not need them in the future
        referenceOrderedDataView.trimCurrentFeaturesToLoc(loc);
View Full Code Here

            // the rodSpan covers all of the bases in the activity profile, including all of the bases
            // through the current window interval.  This is because we may issue a query to get data for an
            // active region spanning before the current interval as far back as the start of the current profile,
            // if we have pending work to do that finalizes in this interval.
            final GenomeLoc rodSpan = activityProfile.getSpan() == null ? currentWindow : activityProfile.getSpan().endpointSpan(currentWindow);
            if ( ! dataProvider.getShard().getLocation().containsP(rodSpan) ) throw new IllegalStateException("Rod span " + rodSpan + " isn't contained within the data shard " + dataProvider.getShard().getLocation() + ", meaning we wouldn't get all of the data we need");
            referenceOrderedDataView = new IntervalReferenceOrderedView( dataProvider, rodSpan );

            // We keep processing while the next reference location is within the interval
            locOfLastReadAtTraversalStart = spanOfLastSeenRead();
View Full Code Here

                return false;
            else {

                while( locusView.hasNext() ) {
                    final AlignmentContext locus = locusView.next();
                    final GenomeLoc location = locus.getLocation();

                    rememberLastLocusLocation(location);

                    // get all of the new reads that appear in the current pileup, and them to our list of reads
                    // provided we haven't seen them before
                    final Collection<GATKSAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
                    for( final GATKSAMRecord read : reads ) {
                        // note that ActiveRegionShards span entire contigs, so this check is in some
                        // sense no longer necessary, as any read that appeared in the last shard would now
                        // by definition be on a different contig.  However, the logic here doesn't hurt anything
                        // and makes us robust should we decided to provide shards that don't fully span
                        // contigs at some point in the future
                        if ( ! appearedInLastShard(locOfLastReadAtTraversalStart, read) ) {
                            rememberLastReadLocation(read);
                            myReads.add(read);
                        }
                    }

                    // skip this location -- it's not part of our engine intervals
                    if ( outsideEngineIntervals(location) )
                        continue;

                    // we've move across some interval boundary, restart profile
                    final boolean flushProfile = ! activityProfile.isEmpty()
                            && ( activityProfile.getContigIndex() != location.getContigIndex()
                            || location.getStart() != activityProfile.getStop() + 1);
                    final List<MapData> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false, referenceOrderedDataView);

                    dataProvider.getShard().getReadMetrics().incrementNumIterations();

                    // create reference context. Note that if we have a pileup of "extended events", the context will
View Full Code Here

        if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
            // only do this if the walker isn't done!
            final RodLocusView rodLocusView = (RodLocusView)locusView;
            final long nSkipped = rodLocusView.getLastSkippedBases();
            if ( nSkipped > 0 ) {
                final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
                final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
                final M x = walker.map(null, null, ac);
                sum = walker.reduce(x, sum);
            }
        }
View Full Code Here

        }

        @Override
        public MapData next() {
            final AlignmentContext locus = locusView.next();
            final GenomeLoc location = locus.getLocation();

            //logger.info("Pulling data from MapDataIterator at " + location);

            // create reference context. Note that if we have a pileup of "extended events", the context will
            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
View Full Code Here

             if ( r == null ) {
                 it.next();
                 continue;
             }

             GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
             GenomeLoc thatContig = r.getLocation();

             if ( currentContig.isPast(thatContig) )
                 throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() +
                         " occurs out of order in track " + r.getName() );
             if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet...
View Full Code Here

        while ( it.hasNext() ) {
            GATKFeature r = it.next();
            if ( r == null ) continue;

            GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
            GenomeLoc thatContig = r.getLocation();

            if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet
            if ( currentContig.isBefore(thatContig) ) {
                it.pushback(r); // next record is on the higher contig, we do not need it yet...
                break;
View Full Code Here

TOP

Related Classes of org.broadinstitute.gatk.utils.GenomeLoc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.