Source Code of org.apache.uima.examples.tagger.test.TaggerTest

/*
 *Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 */
package org.apache.uima.examples.tagger.test;


import junit.framework.TestCase;
import java.util.*;
import java.util.Map.Entry;


import org.apache.uima.examples.tagger.HMMTagger;
import org.apache.uima.examples.tagger.Viterbi;


//This test was run with JUnit3


public class TaggerTest extends TestCase {


  private HMMTagger hmm;


  private List<String> sent; // sentence


  private List<String> gold_standard;


  private List<String> tagger_output;


  /**
   * Set up the test fixture
   */


  protected void setUp() {


    hmm = new HMMTagger();
    gold_standard = new ArrayList<String>();
    sent = new ArrayList<String>();
    tagger_output = new ArrayList<String>();


  }


  /**
   * Tests tagging for German.
   * 
   */
  @SuppressWarnings("unchecked")
  public void testGermanTagger() {


    System.out.println("Tesing German Model... ");
    List POS = new ArrayList();


    try {
      hmm.my_model = HMMTagger.get_model("resources/german/TuebaModel.dat");
    } catch (Exception e) {
      System.out.println("Model which is supposed to be used for testing does not exist");
    }
    System.out.println(hmm.my_model.word_probs.size() + " distinct words in the model");


    Iterator<Entry<String, Map<String, Double>>> keyValuePairs = hmm.my_model.word_probs.entrySet()
            .iterator(); // iterate over words


    for (int i = 0; i < hmm.my_model.word_probs.size(); i++) {
      Map.Entry<String, Map<String, Double>> entry = (Map.Entry<String, Map<String, Double>>) keyValuePairs
              .next();
      Object key = entry.getKey();
      Map<String, Double> pos = (Map) hmm.my_model.word_probs.get(key); // map of possible pos-s of
                                                                        // the word
      Object[] pos_s = pos.entrySet().toArray(); // for iteration over possible pos_s


      for (int u = 0; u < pos_s.length; u++) {


        Map.Entry<String, Map<String, Double>> entry2 = (Map.Entry<String, Map<String, Double>>) pos_s[u];
        Object key2 = entry2.getKey(); // pos of a word
        if (POS.contains(key2)) {
          continue;


        } else {
          POS.add(key2);
        }
      }


    }
    Collections.sort(POS);
    System.out.println("Number of part-of-speech tags used: " + POS.size());
    System.out.println("These are:  " + POS);


    System.out.println("Testing German trigram tagger..");


    sent.add("Jerry");
    sent.add("liebt");
    sent.add("Wansley");
    sent.add(".");


    System.out.println(sent);


    hmm.N = 3;
   // hmm.END_OF_SENT_TAG = "$.";


    String[] out = new String[] { "NE", "VVFIN", "NE", "$." };
    gold_standard.addAll(Arrays.asList(out));
    tagger_output = Viterbi.process(hmm.N, sent, hmm.my_model.suffix_tree,
            hmm.my_model.suffix_tree_capitalized, hmm.my_model.transition_probs,
            hmm.my_model.word_probs, hmm.my_model.lambdas2, hmm.my_model.lambdas3,
            hmm.my_model.theta);
    System.out.println("expected: " + gold_standard);
    System.out.println("tagger output: " + tagger_output);
    assertEquals(gold_standard, tagger_output);
    System.out.println("Very Good!");
    System.out.println("==========================================================");
  }


  /**
   * Tests English trigram tagger
   * 
   */
  @SuppressWarnings("unchecked")
  public void testEnglishTagger() {


    System.out.println("Tesing English Model... ");
    List POS = new ArrayList();


    try {
      hmm.my_model = HMMTagger.get_model("resources/english/BrownModel.dat");
    } catch (Exception e) {
      System.out.println("Model which is supposed to be used for testing does not exist");
    }
    System.out.println(hmm.my_model.word_probs.size() + " distinct words in the model");


    Iterator<Entry<String, Map<String, Double>>> keyValuePairs = hmm.my_model.word_probs.entrySet()
            .iterator(); // iterate over words


    for (int i = 0; i < hmm.my_model.word_probs.size(); i++) {
      Map.Entry<String, Map<String, Double>> entry = (Map.Entry<String, Map<String, Double>>) keyValuePairs
              .next();
      Object key = entry.getKey();
      Map<String, Double> pos = (Map) hmm.my_model.word_probs.get(key); // map of possible pos-s of
                                                                        // the word
      Object[] pos_s = pos.entrySet().toArray(); // for iteration over possible pos_s


      for (int u = 0; u < pos_s.length; u++) {


        Map.Entry<String, Map<String, Double>> entry2 = (Map.Entry<String, Map<String, Double>>) pos_s[u];
        Object key2 = entry2.getKey(); // pos of a word
        if (POS.contains(key2)) {
          continue;


        } else {
          POS.add(key2);
        }
      }


    }
    Collections.sort(POS);
    System.out.println("Number of part-of-speech tags used: " + POS.size());
    System.out.println("These are:  " + POS);


    System.out.println("Testing English trigram tagger...");


    sent.add("Jerry");
    sent.add("loves");
    sent.add("Wansley");
    sent.add(".");


    System.out.println(sent);


    hmm.N = 3;
 //   hmm.END_OF_SENT_TAG = "$.";


    String[] out = new String[] { "np", "vbz", "np", "." };
    gold_standard.addAll(Arrays.asList(out));
    tagger_output = Viterbi.process(hmm.N, sent, hmm.my_model.suffix_tree,
            hmm.my_model.suffix_tree_capitalized, hmm.my_model.transition_probs,
            hmm.my_model.word_probs, hmm.my_model.lambdas2, hmm.my_model.lambdas3,
            hmm.my_model.theta);
    System.out.println("expected: " + gold_standard);
    System.out.println("tagger output: " + tagger_output);
    assertEquals(gold_standard, tagger_output);
    System.out.println("Very Good!");
  }


}
Source Code of org.apache.uima.examples.tagger.test.TaggerTest

Related Classes of org.apache.uima.examples.tagger.test.TaggerTest