Package opennlp.tools.formats

Source Code of opennlp.tools.formats.ConllXPOSSampleStreamTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.formats;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import java.io.IOException;
import java.nio.charset.Charset;

import opennlp.tools.postag.POSSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;

import org.junit.Test;

public class ConllXPOSSampleStreamTest {

  @Test
  public void testParsingSample() throws IOException {

    InputStreamFactory in = new ResourceAsStreamFactory(ConllXPOSSampleStreamTest.class,
        "/opennlp/tools/formats/conllx.sample");

    ObjectStream<POSSample> sampleStream = new ConllXPOSSampleStream(in,Charset.forName("UTF-8"));

    POSSample a = sampleStream.read();

    String aSentence[] = a.getSentence();
    String aTags[] = a.getTags();

    assertEquals(22, aSentence.length);
    assertEquals(22, aTags.length);

    assertEquals("To", aSentence[0]);
    assertEquals("AC", aTags[0]);

    assertEquals("kendte", aSentence[1]);
    assertEquals("AN", aTags[1]);

    assertEquals("russiske", aSentence[2]);
    assertEquals("AN", aTags[2]);

    assertEquals("historikere", aSentence[3]);
    assertEquals("NC", aTags[3]);

    assertEquals("Andronik", aSentence[4]);
    assertEquals("NP", aTags[4]);

    assertEquals("Andronik", aSentence[5]);
    assertEquals("NP", aTags[5]);

    assertEquals("og", aSentence[6]);
    assertEquals("CC", aTags[6]);

    assertEquals("Igor", aSentence[7]);
    assertEquals("NP", aTags[7]);

    assertEquals("Klamkin", aSentence[8]);
    assertEquals("NP", aTags[8]);

    assertEquals("tror", aSentence[9]);
    assertEquals("VA", aTags[9]);

    assertEquals("ikke", aSentence[10]);
    assertEquals("RG", aTags[10]);

    assertEquals(",", aSentence[11]);
    assertEquals("XP", aTags[11]);

    assertEquals("at", aSentence[12]);
    assertEquals("CS", aTags[12]);

    assertEquals("Rusland", aSentence[13]);
    assertEquals("NP", aTags[13]);

    assertEquals("kan", aSentence[14]);
    assertEquals("VA", aTags[14]);

    assertEquals("udvikles", aSentence[15]);
    assertEquals("VA", aTags[15]);

    assertEquals("uden", aSentence[16]);
    assertEquals("SP", aTags[16]);

    assertEquals("en", aSentence[17]);
    assertEquals("PI", aTags[17]);

    assertEquals("\"", aSentence[18]);
    assertEquals("XP", aTags[18]);

    assertEquals("jernnæve", aSentence[19]);
    assertEquals("NC", aTags[19]);

    assertEquals("\"", aSentence[20]);
    assertEquals("XP", aTags[20]);

    assertEquals(".", aSentence[21]);
    assertEquals("XP", aTags[21]);

    POSSample b = sampleStream.read();

    String bSentence[] = b.getSentence();
    String bTags[] = b.getTags();

    assertEquals(12, bSentence.length);
    assertEquals(12, bTags.length);

    assertEquals("De", bSentence[0]);
    assertEquals("PP", bTags[0]);

    assertEquals("hævder", bSentence[1]);
    assertEquals("VA", bTags[1]);

    assertEquals(",", bSentence[2]);
    assertEquals("XP", bTags[2]);

    assertEquals("at", bSentence[3]);
    assertEquals("CS", bTags[3]);

    assertEquals("Ruslands", bSentence[4]);
    assertEquals("NP", bTags[4]);

    assertEquals("vej", bSentence[5]);
    assertEquals("NC", bTags[5]);

    assertEquals("til", bSentence[6]);
    assertEquals("SP", bTags[6]);

    assertEquals("demokrati", bSentence[7]);
    assertEquals("NC", bTags[7]);

    assertEquals("går", bSentence[8]);
    assertEquals("VA", bTags[8]);

    assertEquals("gennem", bSentence[9]);
    assertEquals("SP", bTags[9]);

    assertEquals("diktatur", bSentence[10]);
    assertEquals("NC", bTags[10]);

    assertEquals(".", bSentence[11]);
    assertEquals("XP", bTags[11]);

    assertNull(sampleStream.read());
  }
}
TOP

Related Classes of opennlp.tools.formats.ConllXPOSSampleStreamTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.