package com.googlecode.gaal.suffix.impl;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.TreeMap;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.Multiset;
import com.googlecode.gaal.data.api.SymbolTable;
import com.googlecode.gaal.data.impl.ArraySequence;
import com.googlecode.gaal.data.impl.TreeMultiset;
import com.googlecode.gaal.suffix.algorithm.impl.ExtendedLcpTableBuilderImpl;
import com.googlecode.gaal.suffix.algorithm.impl.KimChildTableBuilder;
import com.googlecode.gaal.suffix.algorithm.impl.NaiveLcpTableBuilder;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree.EmbeddedInterval;
import com.googlecode.gaal.suffix.api.EnhancedSuffixArray;
import com.googlecode.gaal.suffix.api.SuffixArray;
public class EmbeddedSuffixTreeImpl extends AbstractBinaryIntervalTree<EmbeddedInterval> implements EmbeddedSuffixTree {
private final EmbeddedInterval top;
private final int[] embeddingSuffixTable;
private final Interval interval;
private EmbeddedSuffixTreeImpl(Interval interval, IntSequence sequence, int alphabetSize, int[] suffixTable,
int[] inverseSuffixTable, int[] embeddingSuffixTable, int[] lcpTable, int[] extendedLcpTable,
int[] childTable) {
super(sequence, alphabetSize, suffixTable, lcpTable, extendedLcpTable, childTable);
top = new EmbeddedIntervalImpl(0, suffixTable.length - 1);
this.interval = interval;
this.embeddingSuffixTable = embeddingSuffixTable;
this.inverseSuffixTable = inverseSuffixTable;
}
@Override
public EmbeddedInterval top() {
return top;
}
protected class EmbeddedIntervalImpl extends AbstractBinaryInterval implements EmbeddedInterval {
private Multiset<IntSequence> fillerSet;
protected EmbeddedIntervalImpl(int left, int right) {
super(left, right);
}
@Override
public EmbeddedInterval leftChild() {
return new EmbeddedIntervalImpl(left, middle(left, right) - 1);
}
@Override
public EmbeddedInterval rightChild() {
return new EmbeddedIntervalImpl(middle(left, right), right);
}
@Override
public Iterator<IntSequence> fillerIterator() {
return new Iterator<IntSequence>() {
private int index = left;
private int lcp = interval.lcp();
@Override
public boolean hasNext() {
return index <= right;
}
@Override
public IntSequence next() {
if (index > right)
throw new NoSuchElementException();
else {
int end = suffixTable[index];
return sequence.subSequence(embeddingSuffixTable[index++] + lcp, end);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public Multiset<IntSequence> fillerSet() {
if (fillerSet == null) {
fillerSet = new TreeMultiset<IntSequence>(fillerIterator());
}
return fillerSet;
}
@Override
public Interval getEmbeddingInterval() {
return interval;
}
@Override
public IntSequence embeddingIndices() {
return new ArraySequence(embeddingSuffixTable, left, right + 1);
}
}
/**
* a factory method to create an embedded suffix tree
*
* @param <S>
* - alphabet symbol type
* @param sa
* - the top level suffix array
* @param interval
* - the embedding interval
* @param windowSize
* - the window size
* @param symbolTable
* - the symbol table
* @return the embedded suffix tree for the given interval, null if such
* tree can not be constructed
*/
public static <S> EmbeddedSuffixTree create(SuffixArray sa, Interval interval, int windowSize,
SymbolTable<S> symbolTable) {
IntSequence sequence = sa.getSequence();
int[] suffixTable = sa.getSuffixTable();
int[] inverseSuffixTable = sa.getInverseSuffixTable();
SortedMap<Integer, Integer> embeddedSuffixTableIndices = new TreeMap<Integer, Integer>();
int lcp = interval.lcp();
IntSequence indices = interval.indices();
for (int i = 0; i < interval.size(); i++) {
int start = indices.get(i) + lcp;
for (int j = start; j < start + windowSize && j < inverseSuffixTable.length
&& !symbolTable.isSeparator(sequence.get(j)); j++) {
Integer startIndex = embeddedSuffixTableIndices.get(inverseSuffixTable[j]);
if (startIndex == null || startIndex < start) {
embeddedSuffixTableIndices.put(inverseSuffixTable[j], start);
}
}
}
int[] embeddedSuffixTable = new int[embeddedSuffixTableIndices.size()];
int[] embeddingSuffixTable = new int[embeddedSuffixTableIndices.size()];
int index = 0;
for (Entry<Integer, Integer> entry : embeddedSuffixTableIndices.entrySet()) {
embeddedSuffixTable[index] = suffixTable[entry.getKey()];
embeddingSuffixTable[index] = entry.getValue() - lcp;
index++;
}
if (embeddedSuffixTable.length > 0) {
int alphabetSize = symbolTable.alphabetSize();
int[] lcpTable = new NaiveLcpTableBuilder().buildLcpTable(embeddedSuffixTable, sequence);
int[] childTable = new KimChildTableBuilder().buildChildTable(lcpTable);
EnhancedSuffixArray esa = new EnhancedSuffixArrayBase(sequence, alphabetSize, embeddedSuffixTable,
lcpTable, childTable);
int[] extendedLcpTable = new ExtendedLcpTableBuilderImpl().buildExtendedLcpTable(esa);
childTable = new KimChildTableBuilder().buildChildTable(extendedLcpTable);
return new EmbeddedSuffixTreeImpl(interval, sequence, alphabetSize, embeddedSuffixTable,
inverseSuffixTable, embeddingSuffixTable, lcpTable, extendedLcpTable, childTable);
} else
return null;
}
}