Package opennlp.tools.chunker

Source Code of opennlp.tools.chunker.ChunkSampleTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.chunker;

import static org.junit.Assert.*;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Arrays;

import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

import org.junit.Test;

public class ChunkSampleTest {

  @Test(expected=IllegalArgumentException.class)
  public void testParameterValidation() {
    new ChunkSample(new String[]{""}, new String[]{""},
        new String[]{"test", "one element to much"});
  }
 
  private static String[] createSentence() {
    return new String[] {
        "Forecasts",
        "for",
        "the",
        "trade",
        "figures",
        "range",
        "widely",
        ",",
        "Forecasts",
        "for",
        "the",
        "trade",
        "figures",
        "range",
        "widely",
        "."
    };
  }
 
  private static String[] createTags() {
   
    return new String[]{
        "NNS",
        "IN",
        "DT",
        "NN",
        "NNS",
        "VBP",
        "RB",
        ",",
        "NNS",
        "IN",
        "DT",
        "NN",
        "NNS",
        "VBP",
        "RB",
        "."
    };
  }
 
  private static String[] createChunks() {
    return new String[]{
        "B-NP",
        "B-PP",
        "B-NP",
        "I-NP",
        "I-NP",
        "B-VP",
        "B-ADVP",
        "O",
        "B-NP",
        "B-PP",
        "B-NP",
        "I-NP",
        "I-NP",
        "B-VP",
        "B-ADVP",
        "O"
    };
  }
 
  @Test
  public void testRetrievingContent() {
    ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
   
    assertArrayEquals(createSentence(), sample.getSentence());
    assertArrayEquals(createTags(), sample.getTags());
    assertArrayEquals(createChunks(), sample.getPreds());
  }
 
  @Test
  public void testToString() throws IOException {
   
    ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
    String[] sentence = createSentence();
    String[] tags = createTags();
    String[] chunks = createChunks();
   
    StringReader sr = new StringReader(sample.toString());
    BufferedReader reader = new BufferedReader(sr);
    for (int i = 0; i < sentence.length; i++) {
      String line = reader.readLine();
      String[] parts = line.split("\\s+");
      assertEquals(3, parts.length);
      assertEquals(sentence[i], parts[0]);
      assertEquals(tags[i], parts[1]);
      assertEquals(chunks[i], parts[2]);
    }
  }
 
  @Test
  public void testNicePrint() {
   
    ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks());
   
    assertEquals(" [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] " +
        "[VP range_VBP ] [ADVP widely_RB ] ,_, [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] " +
            "[VP range_VBP ] [ADVP widely_RB ] ._.", sample.nicePrint());
  }
 
  @Test
  public void testAsSpan() {
  ChunkSample sample = new ChunkSample(createSentence(), createTags(),
      createChunks());
  Span[] spans = sample.getPhrasesAsSpanList();

  assertEquals(10, spans.length);
  assertEquals(new Span(0, 1, "NP"), spans[0]);
  assertEquals(new Span(1, 2, "PP"), spans[1]);
  assertEquals(new Span(2, 5, "NP"), spans[2]);
  assertEquals(new Span(5, 6, "VP"), spans[3]);
  assertEquals(new Span(6, 7, "ADVP"), spans[4]);
  assertEquals(new Span(8, 9, "NP"), spans[5]);
    assertEquals(new Span(9, 10, "PP"), spans[6]);
    assertEquals(new Span(10, 13, "NP"), spans[7]);
    assertEquals(new Span(13, 14, "VP"), spans[8]);
    assertEquals(new Span(14, 15, "ADVP"), spans[9]);
  }
 
  @Test
  public void testPhraseAsSpan() {
    Span[] spans = ChunkSample.phrasesAsSpanList(createSentence(),
        createTags(), createChunks());

      assertEquals(10, spans.length);
      assertEquals(new Span(0, 1, "NP"), spans[0]);
      assertEquals(new Span(1, 2, "PP"), spans[1]);
      assertEquals(new Span(2, 5, "NP"), spans[2]);
      assertEquals(new Span(5, 6, "VP"), spans[3]);
      assertEquals(new Span(6, 7, "ADVP"), spans[4]);
      assertEquals(new Span(8, 9, "NP"), spans[5]);
      assertEquals(new Span(9, 10, "PP"), spans[6]);
      assertEquals(new Span(10, 13, "NP"), spans[7]);
      assertEquals(new Span(13, 14, "VP"), spans[8]);
      assertEquals(new Span(14, 15, "ADVP"), spans[9]);
  }

  @Test
  public void testRegions() throws IOException {
  InputStream in = getClass().getClassLoader()
      .getResourceAsStream("opennlp/tools/chunker/output.txt");

  String encoding = "UTF-8";

  DummyChunkSampleStream predictedSample = new DummyChunkSampleStream(
      new PlainTextByLineStream(new InputStreamReader(in,
          encoding)), false);

  ChunkSample cs1 = predictedSample.read();
  String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence());
  assertEquals(15, g1.length);
 
  ChunkSample cs2 = predictedSample.read();
  String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence());
  assertEquals(10, g2.length);
 
  ChunkSample cs3 = predictedSample.read();
  String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence());
  assertEquals(7, g3.length);
  assertEquals("United", g3[0]);
  assertEquals("'s directors", g3[1]);
  assertEquals("voted", g3[2]);
  assertEquals("themselves", g3[3]);
  assertEquals("their spouses", g3[4]);
  assertEquals("lifetime access", g3[5]);
  assertEquals("to", g3[6]);
  }
 

  // following are some tests to check the argument validation. Since all uses
  // the same validateArguments method, we do a deeper test only once

  @Test(expected = IllegalArgumentException.class)
  public void testInvalidPhraseAsSpan1() {
    ChunkSample.phrasesAsSpanList(new String[2], new String[1], new String[1]);
  }

  @Test(expected = IllegalArgumentException.class)
  public void testInvalidPhraseAsSpan2() {
    ChunkSample.phrasesAsSpanList(new String[1], new String[2], new String[1]);
  }

  @Test(expected = IllegalArgumentException.class)
  public void testInvalidPhraseAsSpan3() {
    ChunkSample.phrasesAsSpanList(new String[1], new String[1], new String[2]);
  }

  @Test(expected = IllegalArgumentException.class)
  public void testInvalidChunkSampleArray() {
    new ChunkSample(new String[1], new String[1], new String[2]);
  }

  @Test(expected = IllegalArgumentException.class)
  public void testInvalidChunkSampleList() {
    new ChunkSample(Arrays.asList(new String[1]), Arrays.asList(new String[1]),
        Arrays.asList(new String[2]));
  }
 
  @Test
  public void testEquals() {
    assertFalse(createGoldSample() == createGoldSample());
    assertTrue(createGoldSample().equals(createGoldSample()));
    assertFalse(createPredSample().equals(createGoldSample()));
    assertFalse(createPredSample().equals(new Object()));
  }
 
  public static ChunkSample createGoldSample() {
      return new ChunkSample(createSentence(), createTags(), createChunks());
  }
 
  public static ChunkSample createPredSample() {
      String[] chunks = createChunks();
      chunks[5] = "B-NP";
      return new ChunkSample(createSentence(), createTags(), chunks);
  }
 
}
TOP

Related Classes of opennlp.tools.chunker.ChunkSampleTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.