Package opennlp.tools.namefind

Source Code of opennlp.tools.namefind.DictionaryNameFinderEvaluatorTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.namefind;

import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringList;
import opennlp.tools.util.eval.FMeasure;

import org.junit.Test;

/**
* Tests the evaluation of a {@link DictionaryNameFinder}.
*/
public class DictionaryNameFinderEvaluatorTest {

  @Test
  public void testEvaluator() throws IOException, URISyntaxException {
    DictionaryNameFinder nameFinder = new DictionaryNameFinder(
        createDictionary());
    TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
        nameFinder, new NameEvaluationErrorListener());
    ObjectStream<NameSample> sample = createSample();

    evaluator.evaluate(sample);
    sample.close();
    FMeasure fmeasure = evaluator.getFMeasure();

    assertTrue(fmeasure.getFMeasure() == 1);
    assertTrue(fmeasure.getRecallScore() == 1);
  }

  /**
   * Creates a NameSample stream using an annotated corpus
   *
   * @return
   * @throws IOException
   * @throws URISyntaxException
   */
  private static ObjectStream<NameSample> createSample() throws IOException,
      URISyntaxException {
    FileInputStream sampleDataIn = new FileInputStream(new File(
        DictionaryNameFinderEvaluatorTest.class.getClassLoader()
            .getResource("opennlp/tools/namefind/AnnotatedSentences.txt")
            .toURI()));

    return new NameSampleDataStream(new PlainTextByLineStream(
        sampleDataIn.getChannel(), "ISO-8859-1"));
  }

  /**
   * Creates a dictionary with all names from the sample data.
   *
   * @return a dictionary
   * @throws IOException
   * @throws URISyntaxException
   */
  private static Dictionary createDictionary() throws IOException,
      URISyntaxException {
    ObjectStream<NameSample> sampleStream = createSample();
    NameSample sample = sampleStream.read();
    List<String[]> entries = new ArrayList<String[]>();
    while (sample != null) {
      Span[] names = sample.getNames();
      if (names != null && names.length > 0) {
        String[] toks = sample.getSentence();
        for (Span name : names) {
          String[] nameToks = new String[name.length()];
          System.arraycopy(toks, name.getStart(), nameToks, 0, name.length());
          entries.add(nameToks);
        }
      }
      sample = sampleStream.read();
    }
    sampleStream.close();
    Dictionary dictionary = new Dictionary(true);
    for (String[] entry : entries) {
      StringList dicEntry = new StringList(entry);
      dictionary.put(dicEntry);
    }
    return dictionary;
  }
}
TOP

Related Classes of opennlp.tools.namefind.DictionaryNameFinderEvaluatorTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.