Package com.googlecode.gaal.suffix.impl

Source Code of com.googlecode.gaal.suffix.impl.EmbeddedSuffixTreeImpl$EmbeddedIntervalImpl

package com.googlecode.gaal.suffix.impl;

import java.util.Iterator;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.TreeMap;

import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.Multiset;
import com.googlecode.gaal.data.api.SymbolTable;
import com.googlecode.gaal.data.impl.ArraySequence;
import com.googlecode.gaal.data.impl.TreeMultiset;
import com.googlecode.gaal.suffix.algorithm.impl.ExtendedLcpTableBuilderImpl;
import com.googlecode.gaal.suffix.algorithm.impl.KimChildTableBuilder;
import com.googlecode.gaal.suffix.algorithm.impl.NaiveLcpTableBuilder;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree.EmbeddedInterval;
import com.googlecode.gaal.suffix.api.EnhancedSuffixArray;
import com.googlecode.gaal.suffix.api.SuffixArray;

public class EmbeddedSuffixTreeImpl extends AbstractBinaryIntervalTree<EmbeddedInterval> implements EmbeddedSuffixTree {

    private final EmbeddedInterval top;
    private final int[] embeddingSuffixTable;
    private final Interval interval;

    private EmbeddedSuffixTreeImpl(Interval interval, IntSequence sequence, int alphabetSize, int[] suffixTable,
            int[] inverseSuffixTable, int[] embeddingSuffixTable, int[] lcpTable, int[] extendedLcpTable,
            int[] childTable) {
        super(sequence, alphabetSize, suffixTable, lcpTable, extendedLcpTable, childTable);
        top = new EmbeddedIntervalImpl(0, suffixTable.length - 1);
        this.interval = interval;
        this.embeddingSuffixTable = embeddingSuffixTable;
        this.inverseSuffixTable = inverseSuffixTable;
    }

    @Override
    public EmbeddedInterval top() {
        return top;
    }

    protected class EmbeddedIntervalImpl extends AbstractBinaryInterval implements EmbeddedInterval {
        private Multiset<IntSequence> fillerSet;

        protected EmbeddedIntervalImpl(int left, int right) {
            super(left, right);
        }

        @Override
        public EmbeddedInterval leftChild() {
            return new EmbeddedIntervalImpl(left, middle(left, right) - 1);
        }

        @Override
        public EmbeddedInterval rightChild() {
            return new EmbeddedIntervalImpl(middle(left, right), right);
        }

        @Override
        public Iterator<IntSequence> fillerIterator() {
            return new Iterator<IntSequence>() {
                private int index = left;
                private int lcp = interval.lcp();

                @Override
                public boolean hasNext() {
                    return index <= right;
                }

                @Override
                public IntSequence next() {
                    if (index > right)
                        throw new NoSuchElementException();
                    else {
                        int end = suffixTable[index];
                        return sequence.subSequence(embeddingSuffixTable[index++] + lcp, end);
                    }
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

            };

        }

        @Override
        public Multiset<IntSequence> fillerSet() {
            if (fillerSet == null) {
                fillerSet = new TreeMultiset<IntSequence>(fillerIterator());
            }
            return fillerSet;
        }

        @Override
        public Interval getEmbeddingInterval() {
            return interval;
        }

        @Override
        public IntSequence embeddingIndices() {
            return new ArraySequence(embeddingSuffixTable, left, right + 1);
        }
    }

    /**
     * a factory method to create an embedded suffix tree
     *
     * @param <S>
     *            - alphabet symbol type
     * @param sa
     *            - the top level suffix array
     * @param interval
     *            - the embedding interval
     * @param windowSize
     *            - the window size
     * @param symbolTable
     *            - the symbol table
     * @return the embedded suffix tree for the given interval, null if such
     *         tree can not be constructed
     */
    public static <S> EmbeddedSuffixTree create(SuffixArray sa, Interval interval, int windowSize,
            SymbolTable<S> symbolTable) {
        IntSequence sequence = sa.getSequence();
        int[] suffixTable = sa.getSuffixTable();
        int[] inverseSuffixTable = sa.getInverseSuffixTable();
        SortedMap<Integer, Integer> embeddedSuffixTableIndices = new TreeMap<Integer, Integer>();
        int lcp = interval.lcp();
        IntSequence indices = interval.indices();
        for (int i = 0; i < interval.size(); i++) {
            int start = indices.get(i) + lcp;
            for (int j = start; j < start + windowSize && j < inverseSuffixTable.length
                    && !symbolTable.isSeparator(sequence.get(j)); j++) {
                Integer startIndex = embeddedSuffixTableIndices.get(inverseSuffixTable[j]);
                if (startIndex == null || startIndex < start) {
                    embeddedSuffixTableIndices.put(inverseSuffixTable[j], start);
                }
            }
        }
        int[] embeddedSuffixTable = new int[embeddedSuffixTableIndices.size()];
        int[] embeddingSuffixTable = new int[embeddedSuffixTableIndices.size()];
        int index = 0;
        for (Entry<Integer, Integer> entry : embeddedSuffixTableIndices.entrySet()) {
            embeddedSuffixTable[index] = suffixTable[entry.getKey()];
            embeddingSuffixTable[index] = entry.getValue() - lcp;
            index++;
        }
        if (embeddedSuffixTable.length > 0) {
            int alphabetSize = symbolTable.alphabetSize();
            int[] lcpTable = new NaiveLcpTableBuilder().buildLcpTable(embeddedSuffixTable, sequence);
            int[] childTable = new KimChildTableBuilder().buildChildTable(lcpTable);
            EnhancedSuffixArray esa = new EnhancedSuffixArrayBase(sequence, alphabetSize, embeddedSuffixTable,
                    lcpTable, childTable);
            int[] extendedLcpTable = new ExtendedLcpTableBuilderImpl().buildExtendedLcpTable(esa);
            childTable = new KimChildTableBuilder().buildChildTable(extendedLcpTable);

            return new EmbeddedSuffixTreeImpl(interval, sequence, alphabetSize, embeddedSuffixTable,
                    inverseSuffixTable, embeddingSuffixTable, lcpTable, extendedLcpTable, childTable);
        } else
            return null;
    }
}
TOP

Related Classes of com.googlecode.gaal.suffix.impl.EmbeddedSuffixTreeImpl$EmbeddedIntervalImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.