Package com.googlecode.gaal.data.api

Examples of com.googlecode.gaal.data.api.IntSequence


        }
        return repeatsMap;
    }

    private static List<String> toDocumentList(Corpus<String> corpus, Map<Integer, Integer> repeatsMap) {
        IntSequence sequence = corpus.sequence();
        List<String> documents = new ArrayList<String>();
        StringBuilder sb = new StringBuilder();
        int prevRepeatNumber = -1;
        int docNumber = 0;
        boolean isFirst = true;
        boolean isBlockStart = false;
        for (int i = 0; i < sequence.size(); i++) {
            Integer repeatNumber = repeatsMap.get(i);
            if (prevRepeatNumber != -1 && (repeatNumber == null || prevRepeatNumber != repeatNumber)) {
                sb.append("}");
            }
            if (repeatNumber != null && prevRepeatNumber != repeatNumber) {
                sb.append(String.format(" {\\color{%s}", TikzConstants.VECTOR_STYLES[repeatNumber].getColour()));
                isBlockStart = true;
            }
            if (repeatNumber != null) {
                prevRepeatNumber = repeatNumber;
            } else {
                prevRepeatNumber = -1;
            }
            int docId = corpus.getDocumentId(i);
            if (docNumber != docId) {
                docNumber = docId;
                documents.add(sb.toString());
                sb = new StringBuilder();
            }
            int symbol = sequence.get(i);
            String token = corpus.toToken(symbol);
            if (!corpus.isSeparator(symbol)) {
                if (isFirst) {
                    isFirst = false;
                } else if (!token.equals(",") && !token.equals(".") && !isBlockStart) {
View Full Code Here


        documents.add(sb.toString());
        return documents;
    }

    private static String toDocument(Corpus<String> corpus) {
        IntSequence sequence = corpus.sequence();
        StringBuilder sb = new StringBuilder();
        boolean isFirst = true;
        for (int i = 0; i < sequence.size(); i++) {
            int symbol = sequence.get(i);
            String token = corpus.toToken(symbol);
            if (!corpus.isSeparator(symbol)) {
                if (isFirst) {
                    isFirst = false;
                } else if (!token.equals(",") && !token.equals(".")) {
View Full Code Here

        return sb.toString();
    }

    private static List<Map<String, Integer>> toTermMapList(Corpus<String> corpus) {
        List<Map<String, Integer>> documentList = new ArrayList<Map<String, Integer>>();
        IntSequence sequence = corpus.sequence();
        Map<String, Integer> termMap = null;
        int docNumber = -1;
        boolean isFirst = true;
        for (int i = 0; i < sequence.size(); i++) {
            int docId = corpus.getDocumentId(i);
            if (docNumber != docId) {
                docNumber = docId;
                if (termMap != null)
                    documentList.add(termMap);
                termMap = new TreeMap<String, Integer>();
            }
            int symbol = sequence.get(i);
            String token = corpus.toToken(symbol);
            if (!corpus.isSeparator(symbol)) {
                if (isFirst) {
                    isFirst = false;
                } else if (!token.equals(",") && !token.equals(".")) {
View Full Code Here

    }

    private <E extends BinaryNode<E>> int traverse(IntervalSet<E> bwtSet, SuffixArray sa, E interval) {
        if (interval.isTerminal()) {
            int loc = sa.getSuffixTable()[interval.left()];
            IntSequence sequence = sa.getSequence();
            if (loc == 0)
                return sequence.get(sequence.size() - 1);
            else
                return sequence.get(loc - 1);
        } else {
            E left = interval.leftChild();
            E right = interval.rightChild();

            int leftValue = traverse(bwtSet, sa, left);
View Full Code Here

     * @return the embedded suffix tree for the given interval, null if such
     *         tree can not be constructed
     */
    public static <S> EmbeddedSuffixTree create(SuffixArray sa, Interval interval, int windowSize,
            SymbolTable<S> symbolTable) {
        IntSequence sequence = sa.getSequence();
        int[] suffixTable = sa.getSuffixTable();
        int[] inverseSuffixTable = sa.getInverseSuffixTable();
        SortedMap<Integer, Integer> embeddedSuffixTableIndices = new TreeMap<Integer, Integer>();
        int lcp = interval.lcp();
        IntSequence indices = interval.indices();
        for (int i = 0; i < interval.size(); i++) {
            int start = indices.get(i) + lcp;
            for (int j = start; j < start + windowSize && j < inverseSuffixTable.length
                    && !symbolTable.isSeparator(sequence.get(j)); j++) {
                Integer startIndex = embeddedSuffixTableIndices.get(inverseSuffixTable[j]);
                if (startIndex == null || startIndex < start) {
                    embeddedSuffixTableIndices.put(inverseSuffixTable[j], start);
View Full Code Here

    public String toString(Iterator<IntSequence> iterator, String delimiter) {
        StringBuilder buffer = new StringBuilder();
        boolean isFirst = true;
        buffer.append('{');
        while (iterator.hasNext()) {
            IntSequence sequence = iterator.next();
            if (isFirst)
                isFirst = false;
            else
                buffer.append(',');
            buffer.append(String.format("\"%s\"", join(iterator(sequence), delimiter)));
View Full Code Here

        }
    }

    public static <S> Vector toVector(Interval interval, Corpus<S> corpus) {
        Vector vector = new SparseVector();
        IntSequence indices = interval.indices();
        for (int i = 0; i < indices.size(); i++) {
            int start = indices.get(i);
            int documentId = corpus.getDocumentId(start);
            vector.add(documentId, 1);
        }
        return vector;
    }
View Full Code Here

            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        IntSequence other = (IntSequence) obj;
        return compareTo(other) == 0;
    }
View Full Code Here

        };
    }

    public static Vector toVector(Interval interval, Corpus<String> corpus) {
        Vector vector = new SparseVector();
        IntSequence indices = interval.indices();
        for (int i = 0; i < indices.size(); i++) {
            int start = indices.get(i);
            int documentId = corpus.getDocumentId(start);
            vector.add(documentId, 1);
        }
        return vector;
    }
View Full Code Here

        int prevSuffix = -1;
        boolean hasCommonSuffix = true; // all the fillers share a common
                                        // suffix
        Iterator<IntSequence> fillerIterator = embeddedInterval.fillerIterator();
        while (fillerIterator.hasNext()) {
            IntSequence filler =  fillerIterator.next();
            if (prevPrefix != -1 && hasCommonPrefix && (filler.size() == 0 || filler.get(0) != prevPrefix)) {
                hasCommonPrefix = false;
            }
            if (prevSuffix != -1 && hasCommonSuffix
                    && (filler.size() == 0 || filler.get(filler.size() - 1) != prevSuffix)) {
                hasCommonSuffix = false;
            }
            if (filler.size() > 0) {
                prevPrefix = filler.get(0);
                prevSuffix = filler.get(filler.size() - 1);
                nonEmptyFillerCount++;
            }
        }
        return (!hasCommonPrefix && !hasCommonSuffix && leftRightContextRatio > minLeftRightContextRatio && nonEmptyFillerCount > minNonEmptyFillerCount);
    }
View Full Code Here

TOP

Related Classes of com.googlecode.gaal.data.api.IntSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.