Package edu.cmu.sphinx.linguist.dictionary

Examples of edu.cmu.sphinx.linguist.dictionary.Word


        return probDepth;
    }

    private ProbDepth getNGramProbDepth(WordSequence wordSequence) {
        int numberWords = wordSequence.size();
        Word firstWord = wordSequence.getWord(0);

        if (loader.getNumberNGrams(numberWords) == 0 || !hasUnigram(firstWord))
            return getNGramProbDepth(wordSequence.getNewest());

        if (numberWords < 2) {
View Full Code Here


     *
     * @param wordSequence the unigram word sequence
     * @return the unigram probability
     */
    private ProbDepth getUnigramProbDepth(WordSequence wordSequence) {
        Word unigram = wordSequence.getWord(0);
        UnigramProbability unigramProb = getUnigram(unigram);

        if (unigramProb == null)
            throw new Error("Unigram not in LM: " + unigram);

View Full Code Here

     *
     * @param w
     * @return
     */
    public boolean hasWord(Word w) {
        return (unigramIDMap.get(new Word(w.toString(), null, false)) != null);
    }
View Full Code Here

        vistedNodes.add(thisNode);
        if (thisNode.isFinalNode()) {
            unitSet.add(UnitManager.SILENCE);
        } else if (!thisNode.isEmpty()) {
            Word word = thisNode.getWord();
            Pronunciation[] pronunciations = word.getPronunciations();
            for (Pronunciation pronunciation : pronunciations) {
                unitSet.add(pronunciation.getUnits()[0]);
            }
        } else {
            GrammarArc[] arcs = thisNode.getSuccessors();
View Full Code Here

            if (isFinal()) {
                arcs = EMPTY_ARCS;
            } else if (node.isEmpty()) {
                arcs = getNextGrammarStates(lc, nextBaseID);
            } else {
                Word word = node.getWord();
                Pronunciation[] pronunciations = word.getPronunciations();

                // This can potentially speedup computation
                // pronunciations = filter(pronunciations, nextBaseID);

                SearchStateArc[] nextArcs = new SearchStateArc[pronunciations.length];
View Full Code Here

     * Returns the word associated with the particular unit
     *
     * @return the word associated with this state, or null if there is no word associated with this state.
     */
    public Word getAssociatedWord() {
        Word word = null;
        SentenceHMMState state = this;

        while (state != null && !(state instanceof WordState)) {
            state = state.getParent();
        }
View Full Code Here

            // remove word duplicates
            for (Iterator<Token> i = tokenList.iterator(); i.hasNext();) {
                Token token = i.next();
                WordSearchState wordState = (WordSearchState)token.getSearchState();

                Word word = wordState.getPronunciation().getWord();

                // only allow  maxFiller words
                if (maxFiller > 0) {
                    if (word.isFiller()) {
                        if (fillerCount < maxFiller) {
                            fillerCount++;
                        } else {
                            i.remove();
                            continue;
View Full Code Here

     * @param w the word
     * @param confidence the confidence for this word
     */
    public WordResult(String w, double confidence) {
        Pronunciation[] pros = {Pronunciation.UNKNOWN};
        word = new Word(w, pros, false);
        timeFrame = TimeFrame.NULL;
        this.confidence = confidence;
        this.score = LogMath.LOG_ZERO;
    }
View Full Code Here

            Data prevFeature = prevWordFirstFeature;
            token = token.getPredecessor();

            while (token != null) {
                if (token.isWord()) {
                    Word word = token.getWord();
                    if (withFillers || !word.isFiller()) {
                        TimeFrame timeFrame =
                                new TimeFrame(
                                        ((FloatData) prevFeature)
                                                .getCollectTime(),
                                        ((FloatData) prevWordFirstFeature)
View Full Code Here

     * timestamp. This method assumes that the word tokens come after the unit and hmm tokens.
     *
     * @return the string of words, each with the starting sample number
     */
    private List<WordResult> getTimedWordTokenLastPath(Token token, boolean withFillers) {
        Word word = null;
        Data lastFeature = null;
        Data lastWordFirstFeature = null;
        List<WordResult> result = new ArrayList<WordResult>();

        while (token != null) {
            if (token.isWord()) {
                if (word != null && lastFeature != null) {
                    if (withFillers || !word.isFiller()) {
                        TimeFrame timeFrame = new TimeFrame(((FloatData) lastFeature).getCollectTime(),
                                ((FloatData) lastWordFirstFeature).getCollectTime());
                        result.add(new WordResult(word, timeFrame, token.getScore(), 1.));
                    }
                    word = token.getWord();
View Full Code Here

TOP

Related Classes of edu.cmu.sphinx.linguist.dictionary.Word

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.