Package picard.util

Source Code of picard.util.IntervalListScatterer

package picard.util;

import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
* @author mccowan
*/
public class IntervalListScatterer {

    public enum Mode {
        /**
         * A simple scatter approach in which all output intervals have size equal to the total base count of the source list divide by the
         * scatter count (except for possible variance in the final interval list).
         */
        INTERVAL_SUBDIVISION,
        /**
         * A scatter approach that differs from {@link Mode#INTERVAL_SUBDIVISION} in a few ways.
         * <ol>
         * <li>No interval will be subdivided, and consequently, the requested scatter count is an upper bound of scatter count, not a
         * guarantee as to how many {@link IntervalList}s will be produced (e.g., if scatterCount = 10 but there is only one input interval,
         * only 1 interval list will be emitted).</li>
         * <li>When an interval would otherwise be split, it is instead deferred to the next scatter list.</li>
         * <li>The "target width" of each scatter list may be wider than what is computed for {@link Mode#INTERVAL_SUBDIVISION}.
         * Specifically, if the widest interval in the source interval list is larger than what would otherwise be the target width, that
         * interval's width is used.<br/><br/>The reasoning for this is that this approach produces more consistently-sized interval lists,
         * which is one of the objectives of scattering.</li>
         * </ol>
         */
        BALANCING_WITHOUT_INTERVAL_SUBDIVISION
    }

    private final Mode mode;

    public IntervalListScatterer(final Mode mode) {this.mode = mode;}

    private int deduceIdealSplitLength(final IntervalList uniquedList, final int scatterCount) {
        final int splitWidth = Math.max((int) Math.floor(uniquedList.getBaseCount() / (1.0 * scatterCount)), 1);
        switch (mode) {
            case INTERVAL_SUBDIVISION:
                return splitWidth;
            case BALANCING_WITHOUT_INTERVAL_SUBDIVISION:
                final int widestIntervalLength = Collections.max(uniquedList.getIntervals(), new Comparator<Interval>() {
                    @Override
                    public int compare(final Interval o1, final Interval o2) {
                        return Integer.valueOf(o1.length()).compareTo(o2.length());
                    }
                }).length();

                // There is no purpose to splitting more granularly than the widest interval, so do not.
                return Math.max(widestIntervalLength, splitWidth);
            default:
                throw new IllegalStateException();
        }
    }

    public List<IntervalList> scatter(final IntervalList sourceIntervalList, final int scatterCount) {
        if (scatterCount < 1) throw new IllegalArgumentException("scatterCount < 1");

        final IntervalList uniquedList = sourceIntervalList.uniqued();
        final long idealSplitLength = deduceIdealSplitLength(uniquedList, scatterCount);

        final List<IntervalList> accumulatedIntervalLists = new ArrayList<IntervalList>();

        IntervalList runningIntervalList = new IntervalList(uniquedList.getHeader());
        final ArrayDeque<Interval> intervalQueue = new ArrayDeque<Interval>(uniquedList.getIntervals());

        while (!intervalQueue.isEmpty() && accumulatedIntervalLists.size() < scatterCount - 1) {
            final Interval interval = intervalQueue.pollFirst();
            final long projectedSize = runningIntervalList.getBaseCount() + interval.length();
            if (projectedSize <= idealSplitLength) {
                runningIntervalList.add(interval);
            } else {
                final Interval intervalToAdd;
                switch (mode) {
                    case INTERVAL_SUBDIVISION:
                        final int amountToConsume = (int) (idealSplitLength - runningIntervalList.getBaseCount());
                        final Interval left = new Interval(
                                interval.getSequence(),
                                interval.getStart(),
                                interval.getStart() + amountToConsume - 1,
                                interval.isNegativeStrand(),
                                interval.getName()
                        );
                        final Interval right = new Interval(
                                interval.getSequence(),
                                interval.getStart() + amountToConsume,
                                interval.getEnd(),
                                interval.isNegativeStrand(),
                                interval.getName()
                        );
                        runningIntervalList.add(left);

                        // Push back the excess back onto our queue for reconsideration.
                        intervalQueue.addFirst(right);
                        break;

                    case BALANCING_WITHOUT_INTERVAL_SUBDIVISION:
                        if (runningIntervalList.getIntervals().isEmpty()) {
                            runningIntervalList.add(interval);
                        } else {
                            // Push this interval into the next scatter; re-inject it into the queue, then advance the scatter.
                            intervalQueue.addFirst(interval);
                            accumulatedIntervalLists.add(runningIntervalList);
                            runningIntervalList = new IntervalList(uniquedList.getHeader());
                        }
                        break;
                }
            }

            if (runningIntervalList.getBaseCount() >= idealSplitLength) {
                accumulatedIntervalLists.add(runningIntervalList);
                runningIntervalList = new IntervalList(uniquedList.getHeader());
            }
        }

        // Flush the remaining intervals into the last split.
        while (!intervalQueue.isEmpty()) {
            runningIntervalList.add(intervalQueue.pollFirst());
        }
        if (!runningIntervalList.getIntervals().isEmpty()) {
            accumulatedIntervalLists.add(runningIntervalList);
        }

        return accumulatedIntervalLists;
    }
}
TOP

Related Classes of picard.util.IntervalListScatterer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.