package com.googlecode.gaal.analysis.impl;
import static org.junit.Assert.fail;
import java.util.Iterator;
import org.junit.Before;
import org.junit.Test;
import com.googlecode.gaal.data.api.Corpus;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.IntervalSet;
import com.googlecode.gaal.data.impl.CorpusTest;
import com.googlecode.gaal.suffix.api.IntervalTree.Interval;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree.BinaryInterval;
import com.googlecode.gaal.suffix.impl.LinearizedSuffixTreeImpl;
public class SetBuildersTest {
private Corpus<String> corpus;
private LinearizedSuffixTree lst;
private IntervalSet<BinaryInterval> maximalSet;
private IntervalSet<BinaryInterval> distinctBwtSet;
private IntervalSet<BinaryInterval> singletonBwtSet;
@Before
public void setUp() throws Exception {
// this.corpus = createTinyCorpus();
this.corpus = CorpusTest.createLargeCorpus();
this.lst = new LinearizedSuffixTreeImpl(corpus.sequence(), corpus.alphabetSize());
maximalSet = new LocalMaximumSetBuilder().buildIntervalSet(lst);
distinctBwtSet = new NaiveDistinctBwtSetBuilder().buildIntervalSet(lst);
singletonBwtSet = new SingletonBwtSetBuilder().buildIntervalSet(lst);
}
@Test
public void testMaximal() {
System.out.println("Maximality Test");
Iterator<BinaryInterval> iter = lst.preorderIterator();
while (iter.hasNext()) {
BinaryInterval interval = iter.next();
if (!interval.isTerminal()) {
if (maximalSet.contains(interval) != isMaximal(interval)) {
System.err.printf("%s - got:%s, expected:%s\n", interval, maximalSet.contains(interval),
isMaximal(interval));
int[] lcpTable = lst.getLcpTable();
for (int i = interval.left(); i <= interval.right(); i++) {
System.err.print(lcpTable[i]);
}
System.err.println();
fail("maximality mismatch");
}
}
}
}
@Test
public void testSupermaximal() {
System.out.println("Supermaximality Test");
Iterator<BinaryInterval> iter = lst.preorderIterator();
while (iter.hasNext()) {
BinaryInterval interval = iter.next();
if (!interval.isTerminal()) {
if ((distinctBwtSet.contains(interval) && maximalSet.contains(interval)) != isSupermaximal(interval)) {
System.err.printf("%s - got:%s, expected:%s\n", interval, distinctBwtSet.contains(interval),
isSupermaximal(interval));
int[] suffixTable = lst.getSuffixTable();
IntSequence sequence = corpus.sequence();
for (int i = interval.left(); i <= interval.right(); i++) {
System.err.print(corpus.toToken(sequence.get(suffixTable[i] - 1, sequence.size() - 1)) + " ");
}
System.err.println();
fail("supermaximality mismatch");
}
}
}
}
@Test
public void testBwtSingleton() {
System.out.println("Bwt Singleton Test");
Iterator<BinaryInterval> iter = lst.preorderIterator();
while (iter.hasNext()) {
BinaryInterval interval = iter.next();
if (!interval.isTerminal()) {
if (singletonBwtSet.contains(interval) != isBwtSingleton(interval)) {
System.err.printf("%s - got:%s, expected:%s\n", interval, singletonBwtSet.contains(interval),
isBwtSingleton(interval));
int[] suffixTable = lst.getSuffixTable();
IntSequence sequence = corpus.sequence();
for (int i = interval.left(); i <= interval.right(); i++) {
System.err.print(corpus.toToken(sequence.get(suffixTable[i] - 1, sequence.size() - 1)) + " ");
}
System.err.println();
fail("bwt singleton mismatch");
}
}
}
}
private boolean isMaximal(Interval interval) {
int lcp = interval.lcp();
int left = interval.left();
int right = interval.right();
int[] lcpTable = lst.getLcpTable();
if (lcpTable[left] >= lcp || (right < lcpTable.length - 1 && lcpTable[right + 1] == lcp)) {
return false;
}
for (int i = left + 1; i <= right; i++) {
if (lcpTable[i] != lcp)
return false;
}
return true;
}
private boolean isSupermaximal(Interval interval) {
if (!isMaximal(interval))
return false;
IntSequence sequence = lst.getSequence();
int[] suffixTable = lst.getSuffixTable();
int left = interval.left();
int right = interval.right();
for (int i = left; i <= right; i++) {
for (int j = left; j <= right; j++) {
if (i != j) {
int iLoc = suffixTable[i];
int jLoc = suffixTable[j];
int iSym = sequence.get(iLoc - 1, sequence.size() - 1);
int jSym = sequence.get(jLoc - 1, sequence.size() - 1);
if (iSym == jSym) {
return false;
}
}
}
}
return true;
}
private boolean isBwtSingleton(Interval interval) {
IntSequence sequence = lst.getSequence();
int[] suffixTable = lst.getSuffixTable();
int prev = -1;
for (int i = interval.left(); i <= interval.right(); i++) {
int sym = sequence.get(suffixTable[i] - 1, sequence.size() - 1);
if (prev != -1 && sym != prev) {
return false;
}
prev = sym;
}
return true;
}
}