Package edu.cmu.sphinx.linguist.language.ngram

Source Code of edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModelTest

package edu.cmu.sphinx.linguist.language.ngram;


import static edu.cmu.sphinx.util.LogMath.getLogMath;
import static java.util.Arrays.asList;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.acoustic.UnitManager;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.FullDictionary;


public class DynamicTrigramModelTest {

    private Dictionary dictionary;

    @BeforeClass
    public void setUp() throws IOException {
        URL dictUrl =
                getClass()
                        .getResource(
                                "/edu/cmu/sphinx/models/acoustic/wsj/dict/digits.dict");
        URL noiseDictUrl =
                getClass().getResource(
                        "/edu/cmu/sphinx/models/acoustic/wsj/noisedict");

        dictionary =
                new FullDictionary(dictUrl, noiseDictUrl, null, false, null,
                        false, false, new UnitManager());
        dictionary.allocate();
    }

    @Test
    public void unigramModel() throws IOException {
        DynamicTrigramModel model = new DynamicTrigramModel(dictionary);
        model.setText(asList("one"));
        model.allocate();
        assertThat(model.getVocabulary(), contains("one"));
        assertThat(model.getProbability(new WordSequence(dictionary
                .getWord("one"))), equalTo(getLogMath().linearToLog(1)));
    }

    @Test
    public void bigramModel() throws IOException {
        DynamicTrigramModel model = new DynamicTrigramModel(dictionary);
        model.setText(asList("one", "two"));
        model.allocate();
        assertThat(model.getVocabulary(), containsInAnyOrder("one", "two"));
    }

    @Test
    public void trigramModel() throws IOException {
        DynamicTrigramModel model = new DynamicTrigramModel(dictionary);
        model.setText(asList("one", "two", "three"));
        model.allocate();
        assertThat(model.getVocabulary(),
                containsInAnyOrder("one", "two", "three"));
    }

    @Test(enabled = false)
    public void compareWithPrecomputed() throws ClassNotFoundException,
            IOException {
        DynamicTrigramModel model = new DynamicTrigramModel(dictionary);
        URL url = getClass().getResource("npr.transcript");
       
        Scanner scanner = new Scanner(new File("../words"));
        List<String> words = new ArrayList<String>();
        while (scanner.hasNext()) {
            words.add(scanner.next());
        }
        scanner.close();
        model.setText(words);
        model.allocate();
        url = getClass().getResource("npr.lm");
        SimpleNGramModel simpleModel =
                new SimpleNGramModel(url.getPath(), dictionary, 1.f, -1);
        model.allocate();
        simpleModel.allocate();
        assertThat(model.getVocabulary(), equalTo(simpleModel.getVocabulary()));
        for (WordSequence wordSequence : simpleModel.getNGrams()) {
            if (wordSequence.size() < 3)
                continue;
            System.err.println(wordSequence);
            assertThat(model.getProbability(wordSequence),
                    equalTo(simpleModel.getProbability(wordSequence)));
        }
    }
}
TOP

Related Classes of edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModelTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.