Package com.bericotech.clavin.extractor

Examples of com.bericotech.clavin.extractor.LocationOccurrence


    //this convenience method turns an array of location name strings into a list of occurrences with fake positions.
    //(useful for tests that don't care about position in the document)
    public static List<LocationOccurrence> makeOccurrencesFromNames(String[] locationNames) {
        List<LocationOccurrence> locations = new ArrayList<LocationOccurrence>(locationNames.length);
        for (int i = 0; i < locationNames.length; ++i) {
            locations.add(new LocationOccurrence(locationNames[i], i));
        }
        return locations;
    }
View Full Code Here


     * Tests some border cases involving the resolver.
     */
    @Test
    public void testBorderCases() throws ClavinException {
        // ensure we get no matches for this crazy String
        LocationOccurrence loc = new LocationOccurrence("jhadghaoidhg", 0);
        queryBuilder.location(loc);
        assertTrue("Gazetteer fuzzy off, no match", instance.getClosestLocations(queryBuilder.fuzzyMode(FuzzyMode.OFF).build()).isEmpty());
        assertTrue("Gazetteer fuzzy on, no match", instance.getClosestLocations(queryBuilder.fuzzyMode(FuzzyMode.NO_EXACT).build()).isEmpty());
    }
View Full Code Here

    /**
     * Ensure historical records are not matched by getClosestActiveLocations.
     */
    @Test
    public void testFindHistoricalLocations() throws ClavinException {
        LocationOccurrence sovietUnion = new LocationOccurrence("Soviet Union", 0);
        queryBuilder.location(sovietUnion).maxResults(10).fuzzyMode(FuzzyMode.NO_EXACT);
        List<ResolvedLocation> withHistorical = instance.getClosestLocations(queryBuilder.includeHistorical(true).build());
        List<ResolvedLocation> activeOnly = instance.getClosestLocations(queryBuilder.includeHistorical(false).build());

        // verify that historical Soviet Union is found when searching all locations
View Full Code Here

        // create corresponding Lucene Documents for gazetteer records
        GeoName geoname = GeoName.parseFromGeoNamesRecord(geonamesEntry);
        GeoName geoname2 = GeoName.parseFromGeoNamesRecord(geonamesEntry2);

        // a bogus LocationOccurrence object for testing
        LocationOccurrence locationA = new LocationOccurrence("A", 0);

        // two ResolvedLocation objects created from same Lucene Doc, etc.
        ResolvedLocation resolvedLocation = new ResolvedLocation(locationA, geoname, "Nowhere", false);
        ResolvedLocation resolvedLocationDupe = new ResolvedLocation(locationA, geoname, "Nowhere", false);
View Full Code Here

    //this convenience method turns an array of location name strings into a list of occurrences with fake positions.
    //(useful for tests that don't care about position in the document)
    public static List<LocationOccurrence> makeOccurrencesFromNames(String[] locationNames) {
        List<LocationOccurrence> locations = new ArrayList<LocationOccurrence>(locationNames.length);
        for(int i = 0; i < locationNames.length; ++i ) {
            locations.add(new LocationOccurrence(locationNames[i], i));
        }
        return locations;
    }
View Full Code Here

     * the given location name.
     * @param locName the name of the location to query for
     * @return this
     */
    public QueryBuilder location(final String locName) {
        location = new LocationOccurrence(locName, 0);
        return this;
    }
View Full Code Here

        // if there is no location to query, return no results
        if ("".equals(sanitizedLocationName)) {
            return Collections.EMPTY_LIST;
        }

        LocationOccurrence location = query.getOccurrence();
        int maxResults = query.getMaxResults() > 0 ? query.getMaxResults() : DEFAULT_MAX_RESULTS;
        Filter filter = buildFilter(query);
        List<ResolvedLocation> matches;
        try {
            // attempt to find an exact match for the query
            matches = executeQuery(location, sanitizedLocationName, filter, maxResults, false, query.isFilterDupes(), null);
            if (LOG.isDebugEnabled()) {
                for (ResolvedLocation loc : matches) {
                    LOG.debug("{}", loc);
                }
            }
            // check to see if we should run a fuzzy query based on the configured FuzzyMode
            if (query.getFuzzyMode().useFuzzyMatching(maxResults, matches.size())) {
                // provide any exact matches if we are running a fuzzy query so they can be considered for deduplication
                // and result count
                matches = executeQuery(location, sanitizedLocationName, filter, maxResults, true, query.isFilterDupes(), matches);
                if (LOG.isDebugEnabled()) {
                    for (ResolvedLocation loc : matches) {
                        LOG.debug("{}[fuzzy]", loc);
                    }
                }
            }
            if (matches.isEmpty()) {
                LOG.debug("No match found for: '{}'", location.getText());
            }
        } catch (ParseException pe) {
            throw new ClavinException(String.format("Error parsing query for: '%s'}", location.getText()), pe);
        } catch (IOException ioe) {
            throw new ClavinException(String.format("Error executing query for: '%s'}", location.getText()), ioe);
        }
        return matches;
    }
View Full Code Here

    private List<LocationOccurrence> getLocationOccurrencesFromTermMentions(final Iterable<Vertex> termMentions) {
        List<LocationOccurrence> locationOccurrences = new ArrayList<LocationOccurrence>();

        for (Vertex termMention : termMentions) {
            if (isLocation(termMention)) {
                locationOccurrences.add(new LocationOccurrence(LumifyProperties.TERM_MENTION_TITLE.getPropertyValue(termMention), (int) LumifyProperties.TERM_MENTION_START_OFFSET.getPropertyValue(termMention, 0)));
            }
        }
        return locationOccurrences;
    }
View Full Code Here

                    try {
                        for (String locationName: doc.getLocations()){
                            if(customSubstitutions.contains(locationName)){
                                locationName = customSubstitutions.getSubstitution(locationName);
                            }
                            locationOccurrences.add( new LocationOccurrence(locationName,0) );
                            rawResolvedLocations.addAll( ParseManager.extractAndResolve(locationName).getResolvedLocations() );
                        }
                        List<ResolvedLocation> resolvedLocations;
                        resolvedLocations = ParseManager.getResolver().resolveLocations(locationOccurrences,false);
                        resolvedLocations.addAll(rawResolvedLocations);
View Full Code Here

            logger.debug("Demonym substitution: "+entityName+" to "+fixedName);
        } else if(customSubstitutions.contains(entityName)) {
            fixedName = customSubstitutions.getSubstitution(entityName);
            logger.debug("Custom substitution: "+entityName+" to "+fixedName);
        }
        return new LocationOccurrence(fixedName, position);
    }
View Full Code Here

TOP

Related Classes of com.bericotech.clavin.extractor.LocationOccurrence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.