Package htsjdk.samtools.util

Examples of htsjdk.samtools.util.IntervalList


        IOUtil.assertFileIsWritable(OUTPUT);

        final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE, true);

        // get the intervals
        final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE);

        log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds()));

        /**********************************
         * Now output regions for calling *
         **********************************/

        final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
        log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE));

        for (final Interval i : intervals.getIntervals()) {
            if (OUTPUT_TYPE.accepts(i.getName())) {
                outputIntervals.add(i);
            }
        }

        log.info("Writing Intervals.");
        outputIntervals.write(OUTPUT);

        log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds()));

        return 0;
    }
View Full Code Here


    public static IntervalList segregateReference(final ReferenceSequenceFile refFile, final int maxNmerToMerge) {
        final List<Interval> preliminaryIntervals = new LinkedList<Interval>();
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(refFile.getSequenceDictionary());
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList finalIntervals = new IntervalList(header);

        //iterate over all the sequences in the dictionary
        for (final SAMSequenceRecord rec : refFile.getSequenceDictionary().getSequences()) {
            final ReferenceSequence ref = refFile.getSequence(rec.getSequenceName());
            final byte[] bytes = ref.getBases();
            StringUtil.toUpperCase(bytes);

            boolean nBlockIsOpen = (bytes[0] == 'N');
            int start = 0;

            for (int i = 0; i < bytes.length; ++i) {
                final boolean currentBaseIsN = (bytes[i] == 'N');

                //create intervals when switching, i.e "nBlockIsOpen" disagrees with "currentBaseIsN"
                if (nBlockIsOpen != currentBaseIsN) {
                    preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, i, false, nBlockIsOpen ? Nmer : ACGTmer));
                    start = i;
                    nBlockIsOpen = !nBlockIsOpen;
                }
            }
            // Catch the last block of chromosome
            preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, bytes.length, false, nBlockIsOpen ? Nmer : ACGTmer));
        }

        // now that we have the whole list, we need to remove the short Nmers.
        // process the list, replacing trios with short Nmers in the middle with longer intervals:
        while (!preliminaryIntervals.isEmpty()) {

            //if top trio match the bill, replace them with a merged interval,
            // and push it back the top of the list (we expect alternating Nmers and ACGTmers, but
            // not assuming it in the logic)

            //(I want this to be fast and the strings are all copies of the static prototypes Nmer and ACGTmer )
            //noinspection StringEquality
            if (preliminaryIntervals.size() >= 3 && // three or more intervals
                    preliminaryIntervals.get(0).getName() == ACGTmer &&   //an N-mer
                    preliminaryIntervals.get(1).getName() == Nmer &&      //between two
                    preliminaryIntervals.get(2).getName() == ACGTmer &&   //ACGT-mers
                    preliminaryIntervals.get(0).abuts(preliminaryIntervals.get(1)) && // all abutting
                    preliminaryIntervals.get(1).abuts(preliminaryIntervals.get(2)) && // each other (there are many contigs...)
                    preliminaryIntervals.get(1).length() <= maxNmerToMerge) //and the N-mer is of length N or less
            {
                // create the new ACGTmer interval
                final Interval temp = new Interval(
                        preliminaryIntervals.get(0).getSequence(),
                        preliminaryIntervals.get(0).getStart(),
                        preliminaryIntervals.get(2).getEnd(), false, ACGTmer);

                //remove the first 3 elements of the list
                for (int i = 0; i < 3; ++i) {
                    preliminaryIntervals.remove(0);
                }
                //and replace them with the newly created one
                preliminaryIntervals.add(0, temp);
            } else { //if cannot merge top three intervals, transfer the top intervals to finalIntervals
                finalIntervals.add(preliminaryIntervals.remove(0));
            }
        }
        return finalIntervals;
    }
View Full Code Here

            OUTPUT_DIRECTORY.mkdirs();
        }
        IOUtil.assertDirectoryIsWritable(OUTPUT_DIRECTORY);

        // Load up the targets and the reference
        final IntervalList targets;
        final IntervalList originalTargets =  IntervalList.fromFile(TARGETS);

        {
            // Apply padding
            final IntervalList padded = new IntervalList(originalTargets.getHeader());
            final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary();
            for (final Interval i : originalTargets.getIntervals()) {
                padded.add(new Interval(i.getSequence(),
                        Math.max(i.getStart() - PADDING, 1),
                        Math.min(i.getEnd() + PADDING, dict.getSequence(i.getSequence()).getSequenceLength()),
                        i.isNegativeStrand(),
                        i.getName()));
            }

            log.info("Starting with " + padded.size() + " targets.");
            padded.unique();
            log.info("After uniquing " + padded.size() + " targets remain.");

            if (MERGE_NEARBY_TARGETS) {
                final ListIterator<Interval> iterator = padded.getIntervals().listIterator();
                Interval previous = iterator.next();

                targets = new IntervalList(padded.getHeader());

                while (iterator.hasNext()) {
                    final Interval next = iterator.next();
                    if (previous.getSequence().equals(next.getSequence()) &&
                            estimateBaits(previous.getStart(), previous.getEnd()) + estimateBaits(next.getStart(), next.getEnd()) >=
                                    estimateBaits(previous.getStart(), next.getEnd())) {
                        previous = new Interval(previous.getSequence(),
                                previous.getStart(),
                                Math.max(previous.getEnd(), next.getEnd()),
                                previous.isNegativeStrand(),
                                previous.getName());
                    }
                    else {
                        targets.add(previous);
                        previous = next;
                    }
                }

                if (previous != null) targets.add(previous);
                log.info("After collapsing nearby targets " + targets.size() + " targets remain.");
            }
            else {
                targets = padded;
            }
        }

        final ReferenceSequenceFileWalker referenceWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

        // Check that the reference and the target list have matching headers
        SequenceUtil.assertSequenceDictionariesEqual(referenceWalker.getSequenceDictionary(),
                targets.getHeader().getSequenceDictionary());

        // Design the baits!
        int discardedBaits = 0;
        final IntervalList baits = new IntervalList(targets.getHeader());
        for (final Interval target : targets) {
            final int sequenceIndex = targets.getHeader().getSequenceIndex(target.getSequence());
            final ReferenceSequence reference = referenceWalker.get(sequenceIndex);

            for (final Bait bait : DESIGN_STRATEGY.design(this, target, reference)) {
                if (bait.length() != BAIT_SIZE) {
                    throw new PicardException("Bait designed at wrong length: " + bait);
                }

                if (bait.getMaskedBaseCount() <= REPEAT_TOLERANCE) {
                    baits.add(bait);

                    for (final byte b : bait.getBases()) {
                        final byte upper = StringUtil.toUpperCase(b);
                        if (upper != 'A' && upper != 'C' && upper != 'G' && upper != 'T') {
                            log.warn("Bait contains non-synthesizable bases: " + bait);
                        }
                    }
                }
                else {
                    log.debug("Discarding bait: " + bait);
                    discardedBaits++;
                }
            }
        }

        calculateStatistics(targets, baits);
        log.info("Designed and kept " + baits.size() + " baits, discarded " + discardedBaits);

        // Write out some files!
        originalTargets.write(new File(OUTPUT_DIRECTORY, DESIGN_NAME + ".targets.interval_list"));
        baits.write(new File(OUTPUT_DIRECTORY, DESIGN_NAME + ".baits.interval_list"));
        writeParametersFile(new File(OUTPUT_DIRECTORY, DESIGN_NAME + ".design_parameters.txt"));
        writeDesignFastaFile(new File(OUTPUT_DIRECTORY, DESIGN_NAME + ".design.fasta"), baits);
        if (POOL_SIZE > 0) writePoolFiles(OUTPUT_DIRECTORY, DESIGN_NAME, baits);

        return 0;
View Full Code Here

        this.BAIT_TERRITORY   = (int) baits.getUniqueBaseCount();
        this.BAIT_COUNT       = baits.size();
        this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY;

        // Figure out the intersection between all targets and all baits
        final IntervalList tmp = new IntervalList(targets.getHeader());
        final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0,0);
        detector.addAll(baits.getIntervals(), baits.getIntervals());

        for (final Interval target : targets) {
            final Collection<Interval> overlaps = detector.getOverlaps(target);

            if (overlaps.isEmpty()) {
                this.ZERO_BAIT_TARGETS++;
            }
            else {
                for (final Interval i : overlaps) tmp.add(target.intersect(i));
            }
        }

        tmp.unique();
        this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount();
    }
View Full Code Here

        IOUtil.assertFileIsWritable(OUTPUT);

        final LiftOver liftOver = new LiftOver(CHAIN);
        liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT);

        final IntervalList fromIntervals = IntervalList.fromFile(INPUT);
        final SAMFileHeader toHeader = new SAMFileReader(SEQUENCE_DICTIONARY).getFileHeader();
        liftOver.validateToSequences(toHeader.getSequenceDictionary());
        final IntervalList toIntervals = new IntervalList(toHeader);
        boolean anyFailed = false;
        for (final Interval fromInterval : fromIntervals) {
            final Interval toInterval = liftOver.liftOver(fromInterval);
            if (toInterval != null) {
                toIntervals.add(toInterval);
            } else {
                anyFailed = true;
                LOG.warn("Liftover failed for ", fromInterval, "(len ", fromInterval.length(), ")");
                final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval);
                for (final LiftOver.PartialLiftover partial : partials) {
                    LOG.info(partial);
                }
            }
        }

        toIntervals.sort();
        toIntervals.write(OUTPUT);
        return anyFailed? 1: 0;
    }
View Full Code Here

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

        final IntervalList intervalList = VCFFileReader.fromVcf(INPUT);
        // Sort and write the output
        intervalList.uniqued().write(OUTPUT);
        return 0;
    }
View Full Code Here

        IOUtil.assertFileIsWritable(summaryMetricsFile);
        IOUtil.assertFileIsWritable(detailedMetricsFile);
        IOUtil.assertFileIsWritable(contingencyMetricsFile);

        final boolean usingIntervals = this.INTERVALS != null && this.INTERVALS.size() > 0;
        IntervalList intervals = null;
        SAMSequenceDictionary intervalsSamSequenceDictionary = null;
        if (usingIntervals) {
            log.info("Loading up region lists.");
            long genomeBaseCount = 0;
            for (final File f : INTERVALS) {
                IOUtil.assertFileIsReadable(f);
                final IntervalList tmpIntervalList = IntervalList.fromFile(f);
                if (genomeBaseCount == 0) {         // Don't count the reference length more than once.
                    intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
                    genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
                }

                if (intervals == null)        intervals = tmpIntervalList;
                else if (INTERSECT_INTERVALS) intervals = IntervalList.intersection(intervals, tmpIntervalList);
View Full Code Here

    public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile) {

        OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0);
        if (ribosomalIntervalsFile != null) {

            final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
            try {
                SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
            } catch (SequenceUtil.SequenceListsDifferException e) {
                throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(),
                        e);
            }
            ribosomalIntervals.unique();
            final List<Interval> intervals = ribosomalIntervals.getIntervals();
            ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
        }
        return ribosomalSequenceOverlapDetector;
    }
View Full Code Here

        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);
        if (PER_TARGET_COVERAGE != null) IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);

        final SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
        final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);

        // Validate that the targets and baits have the same references as the reads file
        SequenceUtil.assertSequenceDictionariesEqual(
                reader.getFileHeader().getSequenceDictionary(),
                targetIntervals.getHeader().getSequenceDictionary());
        SequenceUtil.assertSequenceDictionariesEqual(
                reader.getFileHeader().getSequenceDictionary(),
                getProbeIntervals().getHeader().getSequenceDictionary()
        );
View Full Code Here

        else dbSnp = null;

        // Make an iterator that will filter out funny looking things
        final SamLocusIterator iterator;
        if (INTERVALS != null) {
            final IntervalList intervals = IntervalList.fromFile(INTERVALS);
            intervals.unique();
            iterator = new SamLocusIterator(in, intervals, false);
        }
        else {
            iterator = new SamLocusIterator(in);
        }
View Full Code Here

TOP

Related Classes of htsjdk.samtools.util.IntervalList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.