Package org.mediameter.cliff.places.disambiguation

Source Code of org.mediameter.cliff.places.disambiguation.GenericPass

package org.mediameter.cliff.places.disambiguation;

import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.bericotech.clavin.gazetteer.FeatureClass;
import com.bericotech.clavin.gazetteer.GeoName;
import com.bericotech.clavin.resolver.ResolvedLocation;

/**
* Wrapper around the concept that we can disambiguate ResolvedLocations in passes, building
* on the confidence in disambiguation results from preceeding passes.
* @author rahulb
*/
public abstract class GenericPass {

    private static final Logger logger = LoggerFactory.getLogger(GenericPass.class);

    //private static final double EXACT_MATCH_CONFIDENCE = 1.0;

    private int triggerCount = 0;
   
    public void execute(List<List<ResolvedLocation>> possibilitiesToDo,
            List<ResolvedLocation> bestCandidates) {
        if(possibilitiesToDo.size()==0){    // bail if there is nothing to disambiguate
            return;
        }
        List<List<ResolvedLocation>> possibilitiesToRemove = disambiguate(
                possibilitiesToDo, bestCandidates);
        for(ResolvedLocation pickedCandidate: bestCandidates){
            logSelectedCandidate(pickedCandidate);
            logResolvedLocationInfo(pickedCandidate);
        }
        triggerCount+= possibilitiesToRemove.size();
        for (List<ResolvedLocation> toRemove : possibilitiesToRemove) {
            possibilitiesToDo.remove(toRemove);
        }
        logger.debug("Still have " + possibilitiesToDo.size() + " lists to do");
    }

    abstract public String getDescription();
   
    abstract protected List<List<ResolvedLocation>> disambiguate(
            List<List<ResolvedLocation>> possibilitiesToDo,
            List<ResolvedLocation> bestCandidates);

    /**
     * This version of CLAVIN doesn't appear to fill in the confidence correctly
     * - it says 1.0 for everything. So we need a workaround to see if something
     * is an exact match.
     *
     * @param candidate
     * @return
     */
    static boolean isExactMatch(ResolvedLocation candidate) {
      //logger.debug(candidate.getGeoname().name + " EQUALS " + candidate.location.text + " ? " + candidate.getGeoname().name.equals(candidate.location.text));
        return candidate.getGeoname().getName().equalsIgnoreCase(candidate.getLocation().getText());
        // return candidate.confidence==EXACT_MATCH_CONFIDENCE;
    }
   
    protected static List<ResolvedLocation> getExactMatches(List<ResolvedLocation> candidates){
        ArrayList<ResolvedLocation> exactMatches = new ArrayList<ResolvedLocation>();
        for( ResolvedLocation item: candidates){
            if(GenericPass.isExactMatch(item)){
                exactMatches.add(item);
            }
        }
        return exactMatches;
    }

    protected static boolean inSameSuperPlace(ResolvedLocation candidate, List<ResolvedLocation> list){
        for( ResolvedLocation item: list){
            if(candidate.getGeoname().getAdmin1Code().equals(item.getGeoname().getAdmin1Code())){
                return true;
            }
        }
        return false;
    }
    protected static boolean isCity(ResolvedLocation candidate){
      return candidate.getGeoname().getPopulation()>0 && candidate.getGeoname().getFeatureClass()==FeatureClass.P;
   
    }
    protected static boolean isAdminRegion(ResolvedLocation candidate){
      return candidate.getGeoname().getPopulation()>0 && candidate.getGeoname().getFeatureClass()==FeatureClass.A;
    }
    protected ResolvedLocation findFirstCityCandidate(List<ResolvedLocation> candidates, boolean exactMatchRequired){
      for(ResolvedLocation candidate: candidates) {
            if(isCity(candidate)){
              if (exactMatchRequired && isExactMatch(candidate)){
                return candidate;
              } else if (!exactMatchRequired){
                return candidate;
              }
            }
        }
      return null;  
    }
    protected ResolvedLocation findFirstAdminCandidate(List<ResolvedLocation> candidates, boolean exactMatchRequired){
      for(ResolvedLocation candidate: candidates) {
            if(isAdminRegion(candidate)){
              if (exactMatchRequired && isExactMatch(candidate)){
                return candidate;
              } else if (!exactMatchRequired){
                return candidate;
              }
            }
        }
      return null;  
    }
    /* Logic is now to compare the City place with the Admin/State place.
     * If City has larger population then choose it. If the City and State are in the same country,
     * then choose the city (this will favor Paris the city over Paris the district in France).
     * If the City has lower population and is not in same country then choose the state.
     */
    protected boolean chooseCityOverAdmin(ResolvedLocation cityCandidate, ResolvedLocation adminCandidate){
      if (cityCandidate == null){
        return false;
      } else if (adminCandidate == null){
        return true;
      } else {
        return (cityCandidate.getGeoname().getPopulation() > adminCandidate.getGeoname().getPopulation()) ||
          (cityCandidate.getGeoname().getPrimaryCountryCode() == adminCandidate.getGeoname().getPrimaryCountryCode());
      }
    }
   
 
    protected boolean inSameCountry(ResolvedLocation candidate, List<ResolvedLocation> list){
     
        for( ResolvedLocation item: list){
            if(candidate.getGeoname().getPrimaryCountryCode().equals(item.getGeoname().getPrimaryCountryCode())){
                return true;
            }
        }
        return false;
    }

    public static void logSelectedCandidate(ResolvedLocation candidate){
        logger.debug("  PICKED: "+candidate.getLocation().getText()+"@"+candidate.getLocation().getPosition());
    }
   
    public static void logResolvedLocationInfo(ResolvedLocation resolvedLocation){
        GeoName candidatePlace = resolvedLocation.getGeoname();
        logger.debug("    "+candidatePlace.getGeonameID()+" "+candidatePlace.getName()+
                ", "+ candidatePlace.getAdmin1Code()+
                ", " + candidatePlace.getPrimaryCountryCode()
                + " / "+resolvedLocation.getConfidence()
                +" / "+candidatePlace.getPopulation() + " / " + candidatePlace.getFeatureClass()
                + " ( isExactMatch="+isExactMatch(resolvedLocation)+" )");
    }

    /**
     * How many times has this pass triggered a disambiguation 
     * @return
     */
    public int getTriggerCount(){
        return triggerCount;
    }
   
}
TOP

Related Classes of org.mediameter.cliff.places.disambiguation.GenericPass

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.