Package com.bericotech.clavin.resolver

Source Code of com.bericotech.clavin.resolver.ClavinLocationResolverHeuristicsTest

package com.bericotech.clavin.resolver;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import com.bericotech.clavin.ClavinException;
import com.bericotech.clavin.extractor.LocationOccurrence;
import com.bericotech.clavin.gazetteer.query.LuceneGazetteer;
import java.io.File;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/*#####################################################################
*
* CLAVIN (Cartographic Location And Vicinity INdexer)
* ---------------------------------------------------
*
* Copyright (C) 2012-2013 Berico Technologies
* http://clavin.bericotechnologies.com
*
* ====================================================================
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* ====================================================================
*
* LuceneLocationResolverHeuristicsTest.java
*
*###################################################################*/

/**
* Tests the mapping of location names into
* {@link ResolvedLocation} objects as performed by
* {@link ClavinLocationResolver#resolveLocations(List, boolean)}.
*/
public class ClavinLocationResolverHeuristicsTest {
    public final static Logger logger = LoggerFactory.getLogger(ClavinLocationResolverHeuristicsTest.class);

    // expected geonameID numbers for given location names
    private static final int BOSTON_MA = 4930956;
    private static final int HAVERHILL_MA = 4939085;
    private static final int WORCESTER_MA = 4956184;
    private static final int SPRINGFIELD_MA = 4951788;
    private static final int CHICAGO_IL = 4887398;
    private static final int ROCKFORD_IL = 4907959;
    private static final int SPRINGFIELD_IL = 4250542;
    private static final int DECATUR_IL = 4236895;
    private static final int KANSAS_CITY_MO = 4393217;
    private static final int SPRINGFIELD_MO = 4409896;
    private static final int ST_LOUIS_MO = 4407066;
    private static final int INDEPENDENCE_MO = 4391812;
    private static final int LONDON_UK = 2643743;
    private static final int MANCHESTER_UK = 2643123;
    private static final int HAVERHILL_UK = 2647310;
    private static final int TORONTO_ON = 6167865;
    private static final int OTTAWA_ON = 6094817;
    private static final int HAMILTON_ON = 5969782;
    private static final int KITCHENER_ON = 5992996;
    private static final int LONDON_ON = 6058560;
    private static final int CAIRO_EG = 360630;
    private static final int BENGHAZI_LY = 88319;
    private static final int VIRGINIA_US = 6254928;
    private static final int WASHINGTON_DC = 4140963;
    private static final int MARYLAND_US = 4361885;
    private static final int SEATTLE_WA = 5809844;
    private static final int WASHINGTON_STATE_US = 5815135;
    private static final int TACOMA_WA = 5812944;

    private static final int NO_HEURISTICS_MAX_HIT_DEPTH = 1;
    private static final int NO_HEURISTICS_MAX_CONTEXT_WINDOW = 1;
    private static final int HEURISTICS_MAX_HIT_DEPTH = 5;
    private static final int HEURISTICS_MAX_CONTEXT_WINDOW = 5;

    private ClavinLocationResolver resolver;
    private List<ResolvedLocation> resolvedLocations;

    /**
     * Instantiate two {@link ClavinLocationResolver} objects, one without
     * context-based heuristic matching and other with it turned on.
     */
    @Before
    public void setUp() throws ClavinException {
        resolver = new ClavinLocationResolver(new LuceneGazetteer(new File("./IndexDirectory")));
    }

    private List<ResolvedLocation> resolveNoHeuristics(final List<LocationOccurrence> locs, final boolean fuzzy)
            throws ClavinException {
        return resolver.resolveLocations(locs, NO_HEURISTICS_MAX_HIT_DEPTH, NO_HEURISTICS_MAX_CONTEXT_WINDOW, fuzzy);
    }

    private List<ResolvedLocation> resolveWithHeuristics(final List<LocationOccurrence> locs, final boolean fuzzy)
            throws ClavinException {
        return resolver.resolveLocations(locs, HEURISTICS_MAX_HIT_DEPTH, HEURISTICS_MAX_CONTEXT_WINDOW, fuzzy);
    }

    /**
     * Ensure we select the correct {@link ResolvedLocation} objects
     * without using context-based heuristic matching.
     *
     * Without heuristics, {@link ClavinLocationResolver} will default to
     * mapping location name Strings to the matching
     * {@link ResolvedLocation} object with the greatest (sort boosted) population.
     */
    @Test
    public void testNoHeuristics() throws ClavinException {
        String[] locations = {"Haverhill", "Worcester", "Springfield", "Kansas City"};

        resolvedLocations = resolveNoHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), false);

        assertEquals("LocationResolver chose the wrong \"Haverhill\"", HAVERHILL_MA, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Worcester\"", WORCESTER_MA, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Springfield\"", SPRINGFIELD_MO, resolvedLocations.get(2).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Kansas City\"", KANSAS_CITY_MO, resolvedLocations.get(3).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Springfield in a document about
     * Massachusetts using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsMassachusetts() throws ClavinException {
        String[] locations = {"Boston", "Haverhill", "Worcester", "Springfield", "Leominister"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Boston\"", BOSTON_MA, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Haverhill\"", HAVERHILL_MA, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Worcester\"", WORCESTER_MA, resolvedLocations.get(2).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Springfield\"", SPRINGFIELD_MA, resolvedLocations.get(3).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Springfield in a document about
     * Illinois using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsIllinois() throws ClavinException {
        String[] locations = {"Chicago", "Rockford", "Springfield", "Decatur"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Chicago\"", CHICAGO_IL, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Rockford\"", ROCKFORD_IL, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Springfield\"", SPRINGFIELD_IL, resolvedLocations.get(2).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Decatur\"", DECATUR_IL, resolvedLocations.get(3).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Springfield in a document about
     * Missouri using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsMissouri() throws ClavinException {
        String[] locations = {"Kansas City", "Springfield", "St. Louis", "Independence"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Kansas City\"", KANSAS_CITY_MO, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Springfield\"", SPRINGFIELD_MO, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"St. Louis\"", ST_LOUIS_MO, resolvedLocations.get(2).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Independence\"", INDEPENDENCE_MO, resolvedLocations.get(3).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Haverhill in a document about
     * England using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsEngland() throws ClavinException {
        String[] locations = {"London", "Manchester", "Haverhill"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"London\"", LONDON_UK, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Manchester\"", MANCHESTER_UK, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Haverhill\"", HAVERHILL_UK, resolvedLocations.get(2).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct London in a document about
     * Ontario using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsOntario() throws ClavinException {
        String[] locations = {"Toronto", "Ottawa", "Hamilton", "Kitchener", "London"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Toronto\"", TORONTO_ON, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Ottawa\"", OTTAWA_ON, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Hamilton\"", HAMILTON_ON, resolvedLocations.get(2).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Kitchener\"", KITCHENER_ON, resolvedLocations.get(3).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"London\"", LONDON_ON, resolvedLocations.get(4).getGeoname().getGeonameID());
    }

    /**
     * Tests some border cases involving the resolver.
     */
    @Test
    public void testBorderCases() throws ClavinException {
        // ensure we get no matches for this crazy String
        String[] locations = {"jhadghaoidhg"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), false);
        assertTrue("Heuristic LocationResolver fuzzy off, no match", resolvedLocations.isEmpty());

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);
        assertTrue("Heuristic LocationResolver fuzzy on, no match", resolvedLocations.isEmpty());
    }

    /**
     * Checks fix of bug where admin1 codes from different countries
     * were treated as equal.
     */
    @Test
    public void testHeuristicsNorthAfrica() throws ClavinException {
        String[] locations = {"Cairo", "Benghazi"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Cairo\"", CAIRO_EG, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Benghazi\"", BENGHAZI_LY, resolvedLocations.get(1).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Washington in a document about
     * Washington, DC using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsDC() throws ClavinException {
        String[] locations = {"Virginia", "Washington", "Maryland"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Virginia\"", VIRGINIA_US, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Washington\"", WASHINGTON_DC, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Maryland\"", MARYLAND_US, resolvedLocations.get(2).getGeoname().getGeonameID());
    }

    /**
     * Ensure we select the correct Washington in a document about
     * Washington State using context-based heuristic matching.
     */
    @Test
    public void testHeuristicsWA() throws ClavinException {
        String[] locations = {"Seattle", "Washington", "Tacoma"};

        resolvedLocations = resolveWithHeuristics(ClavinLocationResolverTest.makeOccurrencesFromNames(locations), true);

        assertEquals("LocationResolver chose the wrong \"Seattle\"", SEATTLE_WA, resolvedLocations.get(0).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Washington\"", WASHINGTON_STATE_US, resolvedLocations.get(1).getGeoname().getGeonameID());
        assertEquals("LocationResolver chose the wrong \"Tacoma\"", TACOMA_WA, resolvedLocations.get(2).getGeoname().getGeonameID());
    }
}
TOP

Related Classes of com.bericotech.clavin.resolver.ClavinLocationResolverHeuristicsTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.