Package com.googlecode.gaal.cli

Source Code of com.googlecode.gaal.cli.Test

package com.googlecode.gaal.cli;

import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;

import com.googlecode.gaal.data.api.Corpus;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.SymbolTable;
import com.googlecode.gaal.data.impl.AbstractCorpus;
import com.googlecode.gaal.data.impl.TreeMapCorpus;
import com.googlecode.gaal.preprocess.api.Tokenizer;
import com.googlecode.gaal.preprocess.impl.LowerCaseNormalizer;
import com.googlecode.gaal.preprocess.impl.RegexTokenizer;
import com.googlecode.gaal.suffix.api.IntervalTree;
import com.googlecode.gaal.suffix.api.IntervalTree.Interval;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree;

public class Test {

    public static void main(String[] args) throws IOException {

        StringReader sr = new StringReader("Mining␣Engineering#");
        String[] alphabet = { null, "e", "g", "i", "m", "n", "r", "␣", "#" };
        // StringReader sr = new StringReader("cdabcdabcdcdabcd#"); // Fibonacci
        // string
        // String[] alphabet = { null, "a", "b","c", "d", "#" };
        // StringReader sr = new StringReader("yamātārājabhānasalagā#"); //De
        // Bruijn sequence
        // String[] alphabet = { null, "a", "ā", "b", "g", "h", "j", "l", "m",
        // "n", "r", "s", "t", "y", "#" };
        // StringReader sr = new StringReader("caggtcagtcacggtatca#");
        // String[] alphabet = { null, "a", "c", "g", "t", "#" };

        Tokenizer<String> tokenizer = new RegexTokenizer(sr, "[\\W\\w]", new LowerCaseNormalizer());
        Corpus<String> corpus = new TreeMapCorpus(tokenizer, alphabet);
        System.out.println("alphabet size " + corpus.alphabetSize());
        System.out.println("text size " + corpus.sequence().size());
        System.out.println("text " + corpus.sequence());
        System.out.println("alphabet " + corpus.alphabet());
        Iterator<String> iter = corpus.iterator(corpus.sequence());
        while (iter.hasNext())
            System.out.print(iter.next());
        System.out.println();

        Iterator<String> revIter = corpus.iterator(corpus.sequence().reverse());
        while (revIter.hasNext())
            System.out.print(revIter.next());
        System.out.println();

        // LinearizedSuffixTree sa = new KimLinearizedSuffixTree(
        // corpus.sequence(), corpus.alphabetSize());
        // LinearizedSuffixTree pa = new
        // KimLinearizedSuffixTree(corpus.sequence()
        // .reverse(), corpus.alphabetSize());
        //
        // System.out.println(Arrays.toString(sa.getSuffixTable()));
        // System.out.println(Arrays.toString(sa.getLcpTable()));
        // System.out.println(Arrays.toString(sa.getChildTable()));
        // System.out.println(Arrays.toString(sa.getExtendedLcpTable()));
        // System.out.println(Arrays.toString(sa.getNewChildTable()));
        // System.out.println(suffixToString(sa.getSuffixTable(),
        // corpus.sequence(), corpus));
        // System.out.println(Arrays.toString(pa.getSuffixTable()));
        // System.out.println(Arrays.toString(pa.getLcpTable()));
        // System.out.println(Arrays.toString(pa.getChildTable()));
        // System.out.println(Arrays.toString(pa.getExtendedLcpTable()));
        // System.out.println(Arrays.toString(pa.getNewChildTable()));
        // System.out.println(suffixToString(pa.getSuffixTable(), corpus
        // .sequence().reverse(), corpus));

        // System.out.println(Visualizer.visualizeLST(sa, corpus.sequence(),
        // corpus));
        // System.out.println(Visualizer.visualizeSequence(corpus.sequence(),
        // corpus));
        // System.out.println(Visualizer.visualizeSuffixes(corpus.sequence(),
        // corpus));
        // System.out.println(Visualizer.visualizeSuffixTable(sa,
        // corpus.sequence(), corpus));
        // System.out.println(Visualizer.visualizeLcpTable(sa,
        // corpus.sequence(), corpus));
        // System.out.println(LaTeXVisualizer.visualizeLST(sa,
        // corpus.sequence(), corpus));

        // Interval interval = sa.search(new ArraySequence(new int[] { 1, 3 }));
        // printInterval(interval, sa, corpus.sequence(), corpus);

        // testPreOrderTraversal(new LinearizedSuffixTreeImpl(corpus.sequence(),
        // corpus.alphabetSize()));
        // testPreOrderTraversal(new EnhancedSuffixArrayImpl(
        // corpus.sequence(), corpus.alphabetSize()));
        // testFindMaximalIntervals(corpus);
        // System.out.println();
    }

    public static <E extends Interval> void testPreOrderTraversal(IntervalTree<E> tree) throws IOException {
        Iterator<E> iterator = tree.preorderIterator();

        while (iterator.hasNext()) {
            System.out.println(iterator.next());
        }
    }

    public static <T> String suffixToString(int[] suffixTable, IntSequence sequence, SymbolTable<T> symbolTable) {
        StringBuffer sb = new StringBuffer();
        for (int i = 1; i < suffixTable.length; i++) {
            sb.append(
                    AbstractCorpus.join(symbolTable.iterator(sequence.subSequence(suffixTable[i], sequence.size())),
                            " ")).append("\n");
        }
        return sb.toString();
    }

    public static <T> void printInterval(Interval interval, LinearizedSuffixTree lst, IntSequence sequence,
            SymbolTable<T> symbolTable) {
        for (int i = 1; i < interval.size(); i++) {
            System.out.println(AbstractCorpus.join(
                    symbolTable.iterator(sequence.subSequence(lst.getSuffixTable()[i], sequence.size())), ""));
        }
    }
}
TOP

Related Classes of com.googlecode.gaal.cli.Test

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.