Package org.apache.uima.examples.tagger.test

Source Code of org.apache.uima.examples.tagger.test.TaggerTest

/*
*Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.uima.examples.tagger.test;

import junit.framework.TestCase;
import java.util.*;
import java.util.Map.Entry;

import org.apache.uima.examples.tagger.HMMTagger;
import org.apache.uima.examples.tagger.Viterbi;

//This test was run with JUnit3

public class TaggerTest extends TestCase {

  private HMMTagger hmm;

  private List<String> sent; // sentence

  private List<String> gold_standard;

  private List<String> tagger_output;

  /**
   * Set up the test fixture
   */

  protected void setUp() {

    hmm = new HMMTagger();
    gold_standard = new ArrayList<String>();
    sent = new ArrayList<String>();
    tagger_output = new ArrayList<String>();

  }

  /**
   * Tests tagging for German.
   *
   */
  @SuppressWarnings("unchecked")
  public void testGermanTagger() {

    System.out.println("Tesing German Model... ");
    List POS = new ArrayList();

    try {
      hmm.my_model = HMMTagger.get_model("resources/german/TuebaModel.dat");
    } catch (Exception e) {
      System.out.println("Model which is supposed to be used for testing does not exist");
    }
    System.out.println(hmm.my_model.word_probs.size() + " distinct words in the model");

    Iterator<Entry<String, Map<String, Double>>> keyValuePairs = hmm.my_model.word_probs.entrySet()
            .iterator(); // iterate over words

    for (int i = 0; i < hmm.my_model.word_probs.size(); i++) {
      Map.Entry<String, Map<String, Double>> entry = (Map.Entry<String, Map<String, Double>>) keyValuePairs
              .next();
      Object key = entry.getKey();
      Map<String, Double> pos = (Map) hmm.my_model.word_probs.get(key); // map of possible pos-s of
                                                                        // the word
      Object[] pos_s = pos.entrySet().toArray(); // for iteration over possible pos_s

      for (int u = 0; u < pos_s.length; u++) {

        Map.Entry<String, Map<String, Double>> entry2 = (Map.Entry<String, Map<String, Double>>) pos_s[u];
        Object key2 = entry2.getKey(); // pos of a word
        if (POS.contains(key2)) {
          continue;

        } else {
          POS.add(key2);
        }
      }

    }
    Collections.sort(POS);
    System.out.println("Number of part-of-speech tags used: " + POS.size());
    System.out.println("These are:  " + POS);

    System.out.println("Testing German trigram tagger..");

    sent.add("Jerry");
    sent.add("liebt");
    sent.add("Wansley");
    sent.add(".");

    System.out.println(sent);

    hmm.N = 3;
   // hmm.END_OF_SENT_TAG = "$.";

    String[] out = new String[] { "NE", "VVFIN", "NE", "$." };
    gold_standard.addAll(Arrays.asList(out));
    tagger_output = Viterbi.process(hmm.N, sent, hmm.my_model.suffix_tree,
            hmm.my_model.suffix_tree_capitalized, hmm.my_model.transition_probs,
            hmm.my_model.word_probs, hmm.my_model.lambdas2, hmm.my_model.lambdas3,
            hmm.my_model.theta);
    System.out.println("expected: " + gold_standard);
    System.out.println("tagger output: " + tagger_output);
    assertEquals(gold_standard, tagger_output);
    System.out.println("Very Good!");
    System.out.println("==========================================================");
  }

  /**
   * Tests English trigram tagger
   *
   */
  @SuppressWarnings("unchecked")
  public void testEnglishTagger() {

    System.out.println("Tesing English Model... ");
    List POS = new ArrayList();

    try {
      hmm.my_model = HMMTagger.get_model("resources/english/BrownModel.dat");
    } catch (Exception e) {
      System.out.println("Model which is supposed to be used for testing does not exist");
    }
    System.out.println(hmm.my_model.word_probs.size() + " distinct words in the model");

    Iterator<Entry<String, Map<String, Double>>> keyValuePairs = hmm.my_model.word_probs.entrySet()
            .iterator(); // iterate over words

    for (int i = 0; i < hmm.my_model.word_probs.size(); i++) {
      Map.Entry<String, Map<String, Double>> entry = (Map.Entry<String, Map<String, Double>>) keyValuePairs
              .next();
      Object key = entry.getKey();
      Map<String, Double> pos = (Map) hmm.my_model.word_probs.get(key); // map of possible pos-s of
                                                                        // the word
      Object[] pos_s = pos.entrySet().toArray(); // for iteration over possible pos_s

      for (int u = 0; u < pos_s.length; u++) {

        Map.Entry<String, Map<String, Double>> entry2 = (Map.Entry<String, Map<String, Double>>) pos_s[u];
        Object key2 = entry2.getKey(); // pos of a word
        if (POS.contains(key2)) {
          continue;

        } else {
          POS.add(key2);
        }
      }

    }
    Collections.sort(POS);
    System.out.println("Number of part-of-speech tags used: " + POS.size());
    System.out.println("These are:  " + POS);

    System.out.println("Testing English trigram tagger...");

    sent.add("Jerry");
    sent.add("loves");
    sent.add("Wansley");
    sent.add(".");

    System.out.println(sent);

    hmm.N = 3;
//   hmm.END_OF_SENT_TAG = "$.";

    String[] out = new String[] { "np", "vbz", "np", "." };
    gold_standard.addAll(Arrays.asList(out));
    tagger_output = Viterbi.process(hmm.N, sent, hmm.my_model.suffix_tree,
            hmm.my_model.suffix_tree_capitalized, hmm.my_model.transition_probs,
            hmm.my_model.word_probs, hmm.my_model.lambdas2, hmm.my_model.lambdas3,
            hmm.my_model.theta);
    System.out.println("expected: " + gold_standard);
    System.out.println("tagger output: " + tagger_output);
    assertEquals(gold_standard, tagger_output);
    System.out.println("Very Good!");
  }

}
TOP

Related Classes of org.apache.uima.examples.tagger.test.TaggerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.