Package htsjdk.samtools.util

Examples of htsjdk.samtools.util.IntervalList


    public void testInvalidPicardIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
                                                  String contig, int intervalStart, int intervalEnd ) throws Exception {

        SAMFileHeader picardFileHeader = new SAMFileHeader();
        picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
        IntervalList picardIntervals = new IntervalList(picardFileHeader);
        picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

        File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
        picardIntervals.write(picardIntervalFile);

        List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
        intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));

        IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalArguments.intervalSetRule, argCollection.intervalArguments.intervalMerging, argCollection.intervalArguments.intervalPadding, genomeLocParser);
View Full Code Here


             * we'll fail quickly if it's not a valid file.
             */
            boolean isPicardInterval = false;
            try {
                // Note: Picard will skip over intervals with contigs not in the sequence dictionary
                IntervalList il = IntervalList.fromFile(inputFile);
                isPicardInterval = true;

                int nInvalidIntervals = 0;
                for (Interval interval : il.getIntervals()) {
                    if ( glParser.isValidGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true))
                        ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true));
                    else {
                        nInvalidIntervals++;
                    }
View Full Code Here

  if(contigStartLocs.size() < scatterParts.size())
      throw new UserException.BadInput(String.format("Input genome region has too few contigs (%d) to split into %d parts", contigStartLocs.size(), scatterParts.size()));

  long thisPartBases = 0;
  int partIdx = 0;
  IntervalList outList = new IntervalList(fileHeader);

  for(int i = 0; i < locs.size(); ++i) {

      GenomeLoc loc = locs.get(i);
      thisPartBases += loc.getStop() - loc.getStart();

      outList.add(toInterval(loc, i));

      boolean partMustStop = false;

      if(partIdx < (scatterParts.size() - 1)) {

    // If there are n contigs and n parts remaining then we must split here,
    // otherwise we will run out of contigs.

    int nextPart = partIdx + 1;
    int nextPartMustStartBy = contigStartLocs.get(nextPart + (contigStartLocs.size() - scatterParts.size()));
    if(i + 1 == nextPartMustStartBy)
        partMustStop = true;
   
      }
      else if(i == locs.size() - 1) {

    // We're done! Write the last scatter file.
    partMustStop = true;

      }
     
      if(partMustStop || thisPartBases > idealBasesPerPart) {

    // Ideally we would split here. However, we must make sure to do so
    // on a contig boundary. Test always passes with partMustStop == true
    // since that indicates we're at a contig boundary.

    GenomeLoc nextLoc = null;
    if((i + 1) < locs.size())
        nextLoc = locs.get(i+1);

    if(nextLoc == null || !nextLoc.getContig().equals(loc.getContig())) {

        // Write out this part:
        outList.write(scatterParts.get(partIdx));

        // Reset. If this part ran long, leave the excess in thisPartBases
        // and the next will be a little shorter to compensate.
        outList = new IntervalList(fileHeader);
        thisPartBases -= idealBasesPerPart;
        ++partIdx;
       
    }
View Full Code Here

            throw new UserException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));

        int fileIndex = 0;
        int locIndex = 1;
        for (final List<GenomeLoc> split : splits) {
            IntervalList intervalList = new IntervalList(fileHeader);
            for (final GenomeLoc loc : split)
                intervalList.add(toInterval(loc, locIndex++));
            intervalList.write(scatterParts.get(fileIndex++));
        }
    }
View Full Code Here

        if (flankingList.isEmpty())
            throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals");

        SAMFileHeader samFileHeader = new SAMFileHeader();
        samFileHeader.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
        IntervalList intervalList = new IntervalList(samFileHeader);
        int i = 0;
        for (GenomeLoc loc: flankingList)
            intervalList.add(toInterval(loc, ++i));
        intervalList.write(flankingIntervals);
    }
View Full Code Here

    }

    public List<IntervalList> scatter(final IntervalList sourceIntervalList, final int scatterCount) {
        if (scatterCount < 1) throw new IllegalArgumentException("scatterCount < 1");

        final IntervalList uniquedList = sourceIntervalList.uniqued();
        final long idealSplitLength = deduceIdealSplitLength(uniquedList, scatterCount);

        final List<IntervalList> accumulatedIntervalLists = new ArrayList<IntervalList>();

        IntervalList runningIntervalList = new IntervalList(uniquedList.getHeader());
        final ArrayDeque<Interval> intervalQueue = new ArrayDeque<Interval>(uniquedList.getIntervals());

        while (!intervalQueue.isEmpty() && accumulatedIntervalLists.size() < scatterCount - 1) {
            final Interval interval = intervalQueue.pollFirst();
            final long projectedSize = runningIntervalList.getBaseCount() + interval.length();
            if (projectedSize <= idealSplitLength) {
                runningIntervalList.add(interval);
            } else {
                final Interval intervalToAdd;
                switch (mode) {
                    case INTERVAL_SUBDIVISION:
                        final int amountToConsume = (int) (idealSplitLength - runningIntervalList.getBaseCount());
                        final Interval left = new Interval(
                                interval.getSequence(),
                                interval.getStart(),
                                interval.getStart() + amountToConsume - 1,
                                interval.isNegativeStrand(),
                                interval.getName()
                        );
                        final Interval right = new Interval(
                                interval.getSequence(),
                                interval.getStart() + amountToConsume,
                                interval.getEnd(),
                                interval.isNegativeStrand(),
                                interval.getName()
                        );
                        runningIntervalList.add(left);

                        // Push back the excess back onto our queue for reconsideration.
                        intervalQueue.addFirst(right);
                        break;

                    case BALANCING_WITHOUT_INTERVAL_SUBDIVISION:
                        if (runningIntervalList.getIntervals().isEmpty()) {
                            runningIntervalList.add(interval);
                        } else {
                            // Push this interval into the next scatter; re-inject it into the queue, then advance the scatter.
                            intervalQueue.addFirst(interval);
                            accumulatedIntervalLists.add(runningIntervalList);
                            runningIntervalList = new IntervalList(uniquedList.getHeader());
                        }
                        break;
                }
            }

            if (runningIntervalList.getBaseCount() >= idealSplitLength) {
                accumulatedIntervalLists.add(runningIntervalList);
                runningIntervalList = new IntervalList(uniquedList.getHeader());
            }
        }

        // Flush the remaining intervals into the last split.
        while (!intervalQueue.isEmpty()) {
            runningIntervalList.add(intervalQueue.pollFirst());
        }
        if (!runningIntervalList.getIntervals().isEmpty()) {
            accumulatedIntervalLists.add(runningIntervalList);
        }

        return accumulatedIntervalLists;
    }
View Full Code Here

        IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
        IOUtil.assertFileIsWritable(OUTPUT);
        try {
            final SamReader samReader = SamReaderFactory.makeDefault().open(SEQUENCE_DICTIONARY);
            final SAMFileHeader header = samReader.getFileHeader();
            final IntervalList intervalList = new IntervalList(header);
            CloserUtil.close(samReader);

            /**
             * NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one,
             * so it returns 1-based starts.  Ugh.  Set to zero.
             */
            final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false);
            final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
            final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

            while (iterator.hasNext()) {
                final BEDFeature bedFeature = iterator.next();
                final String sequenceName = bedFeature.getChr();
                /**
                 * NB: BED is zero-based, so we need to add one here to make it one-based.  Please observe we set the start
                 * offset to zero when creating the BEDCodec.
                 */
                final int start = bedFeature.getStart() + 1;
                /**
                 * NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed).
                 */
                final int end = bedFeature.getEnd();
                // NB: do not use an empty name within an interval
                String name = bedFeature.getName();
                if (name.isEmpty()) name = null;

                final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

                // Do some validation
                if (null == sequenceRecord) {
                    throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
                }
                else if (start < 1) {
                    throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
                }
                else if (sequenceRecord.getSequenceLength() < start) {
                    throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
                }
                else if (end < 1) {
                    throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
                }
                else if (sequenceRecord.getSequenceLength() < end) {
                    throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
                }
                else if (end < start - 1) {
                    throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
                }

                final Interval interval = new Interval(sequenceName, start, end, bedFeature.getStrand() == Strand.POSITIVE, name);
                intervalList.add(interval);

                progressLogger.record(sequenceName, start);
            }
            CloserUtil.close(bedReader);

            // Sort and write the output
            intervalList.uniqued().write(OUTPUT);

        } catch (final IOException e) {
            throw new RuntimeException(e);
        }

View Full Code Here

        }

        // Read in the interval lists and apply any padding
        final List<IntervalList> lists = new ArrayList<IntervalList>();
        for (final File f : INPUT) {
            final IntervalList list = IntervalList.fromFile(f);
            if (PADDING != 0) {
                final IntervalList out = new IntervalList(list.getHeader());
                for (final Interval i : list) {
                    final int start = i.getStart() - PADDING;
                    final int end = i.getEnd() + PADDING;
                    if (start <= end) {
                        final Interval i2 = new Interval(i.getSequence(), start, end, i.isNegativeStrand(), i.getName());
                        out.add(i2);
                    }
                }

                lists.add(out);
            } else {
                lists.add(list);
            }
        }

        // same for the second list
        final List<IntervalList> secondLists = new ArrayList<IntervalList>();
        for (final File f : SECOND_INPUT) {
            final IntervalList list = IntervalList.fromFile(f);
            if (PADDING != 0) {
                final IntervalList out = new IntervalList(list.getHeader());
                for (final Interval i : list) {
                    final int start = i.getStart() - PADDING;
                    final int end = i.getEnd() + PADDING;
                    if (start <= end) {
                        final Interval i2 = new Interval(i.getSequence(), start, end, i.isNegativeStrand(), i.getName());
                        out.add(i2);
                    }
                }

                secondLists.add(out);
            } else {
                secondLists.add(list);
            }
        }

        if (UNIQUE && !SORT) {
            LOG.warn("UNIQUE=true requires sorting but SORT=false was specified.  Results will be sorted!");
        }

        final IntervalList result = ACTION.act(lists, secondLists);

        if (INVERT) {
            SORT = false; // no need to sort, since return will be sorted by definition.
            UNIQUE = false; //no need to unique since invert will already return a unique list.
        }

        final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
        final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;

        //only get unique if this has been asked unless inverting (since the invert will return a unique list)
        final List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();

        // Decide on a PG ID and make a program group
        final SAMFileHeader header = result.getHeader();
        final Set<String> pgs = new HashSet<String>();
        for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
        for (int i = 1; i < Integer.MAX_VALUE; ++i) {
            if (!pgs.contains(String.valueOf(i))) {
                final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
                pg.setCommandLine(getCommandLine());
                pg.setProgramName(getClass().getSimpleName());
                header.addProgramRecord(pg);
                break;
            }
        }

        // Add any comments
        if (COMMENT != null) {
            for (final String comment : COMMENT) {
                header.addComment(comment);
            }
        }

        final IntervalList output = new IntervalList(header);
        for (final Interval i : finalIntervals) {
            output.add(i);
        }

        final List<IntervalList> resultIntervals;
        if (OUTPUT != null) {
            if (SCATTER_COUNT == 1) {
                output.write(OUTPUT);
                resultIntervals = Arrays.asList(output);
            } else {
                final List<IntervalList> scattered = writeScatterIntervals(output);
                LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
                if (scattered.size() != SCATTER_COUNT) {
View Full Code Here

    protected int doWork() {
        IOUtil.assertFileIsReadable(INTERVAL_LIST);
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        IOUtil.assertFileIsWritable(OUTPUT);

        final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
        final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

        for (final Interval interval : intervals) {
            final ReferenceSequence seq = ref.getSubsequenceAt(interval.getSequence(), interval.getStart(), interval.getEnd());
View Full Code Here

        if (TARGET_REGION == null) {
            metrics.TARGET_TERRITORY = header.getSequenceDictionary().getReferenceLength();
        } else {
            IOUtil.assertFileIsReadable(TARGET_REGION);
            IOUtil.assertFileIsWritable(OUTPUT);
            final IntervalList targetInterval = IntervalList.fromFile(TARGET_REGION);
            metrics.TARGET_TERRITORY = targetInterval.getUniqueBaseCount();
        }
    }
View Full Code Here

TOP

Related Classes of htsjdk.samtools.util.IntervalList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.