Package org.dbpedia.spotlight.spot

Source Code of org.dbpedia.spotlight.spot.OpenNLPSpottersTest

/*
* Copyright 2011 Pablo Mendes, Max Jakob
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*  http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*
*  Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
*/

package org.dbpedia.spotlight.spot;

import org.dbpedia.spotlight.exceptions.ConfigurationException;
import org.dbpedia.spotlight.exceptions.SpottingException;
import org.dbpedia.spotlight.model.SpotlightConfiguration;
import org.dbpedia.spotlight.model.SurfaceForm;
import org.dbpedia.spotlight.model.SurfaceFormOccurrence;
import org.dbpedia.spotlight.model.Text;
import org.junit.Test;

import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.assertTrue;

/**
* Sees if spotters run.
* Tests if offsets are correct.
* TODO Need to provide tough examples that test each of the spotters
*
* @author pablomendes
*/
public class OpenNLPSpottersTest {

/*
The financial crunch in the U.S. threatens to undermine a foreign policy described as “smart power” in Texas by President Obama and Secretary of State Hillary Rodham Clinton, one that emphasizes diplomacy and development as a complement to American military power. It also would begin to reverse the increase in foreign aid that President George W. Bush supported after the attacks of Sept. 11, 2001, as part of an effort to combat the roots of extremism and anti-American sentiment, especially in the most troubled countries.
*/
    Text t = new Text("The financial crunch in the U.S. threatens to undermine a foreign policy described as “smart power” in Texas by President Obama and Secretary of State Hillary Rodham Clinton, one that emphasizes diplomacy and development as a complement to American military power. It also would begin to reverse the increase in foreign aid that President George W. Bush supported after the attacks of Sept. 11, 2001, as part of an effort to combat the roots of extremism and anti-American sentiment, especially in the most troubled countries. ");
    SurfaceFormOccurrence financialCrunch = new SurfaceFormOccurrence(new SurfaceForm("financial crunch"), t, 4);
    SurfaceFormOccurrence smartPower = new SurfaceFormOccurrence(new SurfaceForm("smart power"), t, 87);
    SurfaceFormOccurrence quotedSmartPower = new SurfaceFormOccurrence(new SurfaceForm("“smart power”"), t, 86);
    SurfaceFormOccurrence us = new SurfaceFormOccurrence(new SurfaceForm("U.S."), t, 28);
    List<SurfaceFormOccurrence> occs = Arrays.asList(financialCrunch, smartPower, quotedSmartPower, us);

    SpotlightConfiguration config = null;



    public OpenNLPSpottersTest() throws ConfigurationException {
        config = new SpotlightConfiguration("conf/dev.properties");
    }


    private void print(List<SurfaceFormOccurrence> spots) {
        for (SurfaceFormOccurrence spot: spots) {
            System.err.println(String.format("%s at %s",spot.surfaceForm(), spot.textOffset()));
        }
    }

    private void testOffsets(List<SurfaceFormOccurrence> spots) {
        for (SurfaceFormOccurrence spot: spots) {
            assertTrue(testOffset(spot));
        }
    }

    private boolean testOffset(SurfaceFormOccurrence occ) {
        String name = occ.surfaceForm().name();
        int offset = occ.textOffset();
        int length = name.length();
        String text = occ.context().text();
        String extracted = text.substring(offset, offset+length);
        System.err.println(String.format("(%s) -> (%s)", name, extracted).replaceAll(" ","_")); //make spaces more visible
        return name.equals(extracted);
    }

    @Test
    public void assureOccsAreCorrectlyCreated() throws ConfigurationException, SpottingException {
        for (SurfaceFormOccurrence spot: occs) {
            assertTrue(testOffset(spot));
        }
    }

    @Test
    public void assureNESpotterRuns() throws ConfigurationException, SpottingException {
        Spotter ner = new NESpotter(config.getSpotterConfiguration().getOpenNLPModelDir() + "/"+config.getLanguage().toLowerCase()+"/", config.getI18nLanguageCode(), config.getSpotterConfiguration().getOpenNLPModelsURI());
        List<SurfaceFormOccurrence> spots = ner.extract(t);
        print(spots);
    }

    @Test
    public void correctNEOffsets() throws ConfigurationException, SpottingException {
        Spotter ner = new NESpotter(config.getSpotterConfiguration().getOpenNLPModelDir() + "/"+config.getLanguage().toLowerCase()+"/", config.getI18nLanguageCode(), config.getSpotterConfiguration().getOpenNLPModelsURI());
        List<SurfaceFormOccurrence> spots = ner.extract(t);
        testOffsets(spots);
    }

    @Test
    public void assureOpenNLPNGramSpotterRuns() throws ConfigurationException, SpottingException {
        OpenNLPNGramSpotter chunkSpotter = new OpenNLPNGramSpotter(config.getSpotterConfiguration().getOpenNLPModelDir() + "/"  + config.getLanguage().toLowerCase(), config.getI18nLanguageCode());
        List<SurfaceFormOccurrence> spots = chunkSpotter.extract(t);
        print(spots);
    }

    @Test
    public void correctChunkerOffsets() throws SpottingException, ConfigurationException {
        OpenNLPNGramSpotter chunkSpotter = new OpenNLPNGramSpotter(config.getSpotterConfiguration().getOpenNLPModelDir()+ "/"  + config.getLanguage().toLowerCase(), config.getI18nLanguageCode());
        List<SurfaceFormOccurrence> spots = chunkSpotter.extract(t);
        System.err.println(financialCrunch);
        testOffsets(spots);
        //assertTrue(spots.contains(financialCrunch));
        //assertTrue(spots.contains(new SurfaceFormOccurrence(new SurfaceForm("smart power"), t, 75)));
        //assertTrue(!spots.contains(new SurfaceFormOccurrence(new SurfaceForm("“smart power”"),t,75)));
    }
}
TOP

Related Classes of org.dbpedia.spotlight.spot.OpenNLPSpottersTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.