package com.googlecode.gaal.cli;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import com.googlecode.gaal.data.api.Corpus;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.SymbolTable;
import com.googlecode.gaal.data.impl.AbstractCorpus;
import com.googlecode.gaal.data.impl.TreeMapCorpus;
import com.googlecode.gaal.preprocess.api.Tokenizer;
import com.googlecode.gaal.preprocess.impl.LowerCaseNormalizer;
import com.googlecode.gaal.preprocess.impl.RegexTokenizer;
import com.googlecode.gaal.suffix.api.IntervalTree;
import com.googlecode.gaal.suffix.api.IntervalTree.Interval;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree;
public class Test {
public static void main(String[] args) throws IOException {
StringReader sr = new StringReader("Mining␣Engineering#");
String[] alphabet = { null, "e", "g", "i", "m", "n", "r", "␣", "#" };
// StringReader sr = new StringReader("cdabcdabcdcdabcd#"); // Fibonacci
// string
// String[] alphabet = { null, "a", "b","c", "d", "#" };
// StringReader sr = new StringReader("yamātārājabhānasalagā#"); //De
// Bruijn sequence
// String[] alphabet = { null, "a", "ā", "b", "g", "h", "j", "l", "m",
// "n", "r", "s", "t", "y", "#" };
// StringReader sr = new StringReader("caggtcagtcacggtatca#");
// String[] alphabet = { null, "a", "c", "g", "t", "#" };
Tokenizer<String> tokenizer = new RegexTokenizer(sr, "[\\W\\w]", new LowerCaseNormalizer());
Corpus<String> corpus = new TreeMapCorpus(tokenizer, alphabet);
System.out.println("alphabet size " + corpus.alphabetSize());
System.out.println("text size " + corpus.sequence().size());
System.out.println("text " + corpus.sequence());
System.out.println("alphabet " + corpus.alphabet());
Iterator<String> iter = corpus.iterator(corpus.sequence());
while (iter.hasNext())
System.out.print(iter.next());
System.out.println();
Iterator<String> revIter = corpus.iterator(corpus.sequence().reverse());
while (revIter.hasNext())
System.out.print(revIter.next());
System.out.println();
// LinearizedSuffixTree sa = new KimLinearizedSuffixTree(
// corpus.sequence(), corpus.alphabetSize());
// LinearizedSuffixTree pa = new
// KimLinearizedSuffixTree(corpus.sequence()
// .reverse(), corpus.alphabetSize());
//
// System.out.println(Arrays.toString(sa.getSuffixTable()));
// System.out.println(Arrays.toString(sa.getLcpTable()));
// System.out.println(Arrays.toString(sa.getChildTable()));
// System.out.println(Arrays.toString(sa.getExtendedLcpTable()));
// System.out.println(Arrays.toString(sa.getNewChildTable()));
// System.out.println(suffixToString(sa.getSuffixTable(),
// corpus.sequence(), corpus));
// System.out.println(Arrays.toString(pa.getSuffixTable()));
// System.out.println(Arrays.toString(pa.getLcpTable()));
// System.out.println(Arrays.toString(pa.getChildTable()));
// System.out.println(Arrays.toString(pa.getExtendedLcpTable()));
// System.out.println(Arrays.toString(pa.getNewChildTable()));
// System.out.println(suffixToString(pa.getSuffixTable(), corpus
// .sequence().reverse(), corpus));
// System.out.println(Visualizer.visualizeLST(sa, corpus.sequence(),
// corpus));
// System.out.println(Visualizer.visualizeSequence(corpus.sequence(),
// corpus));
// System.out.println(Visualizer.visualizeSuffixes(corpus.sequence(),
// corpus));
// System.out.println(Visualizer.visualizeSuffixTable(sa,
// corpus.sequence(), corpus));
// System.out.println(Visualizer.visualizeLcpTable(sa,
// corpus.sequence(), corpus));
// System.out.println(LaTeXVisualizer.visualizeLST(sa,
// corpus.sequence(), corpus));
// Interval interval = sa.search(new ArraySequence(new int[] { 1, 3 }));
// printInterval(interval, sa, corpus.sequence(), corpus);
// testPreOrderTraversal(new LinearizedSuffixTreeImpl(corpus.sequence(),
// corpus.alphabetSize()));
// testPreOrderTraversal(new EnhancedSuffixArrayImpl(
// corpus.sequence(), corpus.alphabetSize()));
// testFindMaximalIntervals(corpus);
// System.out.println();
}
public static <E extends Interval> void testPreOrderTraversal(IntervalTree<E> tree) throws IOException {
Iterator<E> iterator = tree.preorderIterator();
while (iterator.hasNext()) {
System.out.println(iterator.next());
}
}
public static <T> String suffixToString(int[] suffixTable, IntSequence sequence, SymbolTable<T> symbolTable) {
StringBuffer sb = new StringBuffer();
for (int i = 1; i < suffixTable.length; i++) {
sb.append(
AbstractCorpus.join(symbolTable.iterator(sequence.subSequence(suffixTable[i], sequence.size())),
" ")).append("\n");
}
return sb.toString();
}
public static <T> void printInterval(Interval interval, LinearizedSuffixTree lst, IntSequence sequence,
SymbolTable<T> symbolTable) {
for (int i = 1; i < interval.size(); i++) {
System.out.println(AbstractCorpus.join(
symbolTable.iterator(sequence.subSequence(lst.getSuffixTable()[i], sequence.size())), ""));
}
}
}