Package com.ikanow.infinit.e.data_model.store.feature.geo

Examples of com.ikanow.infinit.e.data_model.store.feature.geo.GeoFeaturePojo


        query1.append("search_field", firsttry);
        DBCursor dbc1 = DbManager.getFeature().getGeo().find(query1);
        if ( dbc1.count() == 1 )
        {
          //only 1 match so we can use this
          GeoFeaturePojo gfp = GeoFeaturePojo.fromDb(dbc1.next(),GeoFeaturePojo.class);
          ent_feature.setGeotag(gfp.getGeoindex());
          //we dont know what kind of point this is so we have to guess
          if ( gfp.getCity() != null)       ent_feature.setOntology_type("city");
          else if ( gfp.getRegion() != null )    ent_feature.setOntology_type("countrysubsidiary");
          else if ( gfp.getCountry() != null ent_feature.setOntology_type("country");
          else                  ent_feature.setOntology_type("point");
          return; //we are done so return
        }
        else
        {
View Full Code Here


      Candidate candidate = pair.getValue();
     
// 2.1] Let's analyse the "sitting tenant"
     
      int nPrio = 130;
      GeoFeaturePojo currLeader = null;
      int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other)
     
      if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner!
        nPrio = 10; // winner!
      }//TESTED: "san antonio, texas/city" vs "texas"
      else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) {
        // Indirect ref
        nPrio = 40; // good, but beatable...
      }//TESTED: "philadelphia (village), new york/city"
      else if (otherCountries.contains("united states")) { // Weak direct ref
        nPrio = 80; // better than nothing...       
      }//TESTED: "apache, oklahoma/city"
      else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref
        nPrio = 80; // better than nothing...       
      }//TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list
     
      // Special case: we don't like "village":
      if ((80 != nPrio) && ent.getDisambiguatedName().contains("village") && !ent.getActual_name().contains("village"))
      {       
        nPrio = 80;       
      }//TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia"
     
      // Debug
      if (_nDebugLevel >= 2) {
        System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio);
      }
     
      // Alternatives
      if (nPrio > 10) {
       
        LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates;
        for (GeoFeaturePojo geo: geos) {
         
          int nAltPrio = 140;
          int nAltCase = -1;
          String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null;
          String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null;
          String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null;
         
// 2.2] CASE 1: I'm a city with pop > 1M (best score 15)
//           15] Large city with strong direct   
//           30] Large city with strong indirect
//           70] Large city with weak direct
//           72] Large city with weak indirect
//           75] Large city with no reference          
         
          if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) {
            nAltCase = 1;
           
            if ((null != region) && (otherRegions.contains(region))) {
              nAltPrio = 15; // strong direct
            }//TESTED: "dallas / Texas / United States = 15"
            else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
              nAltPrio = 30; // strong indirect
            }//TESTED: "sacramento / California / United State"
            else if ((null != country) && (otherCountries.contains(country))) {
              nAltPrio = 70; // weak direct
            }//TESTED: "berlin, germany", with "germany" directly mentioned
            else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
              nAltPrio = 72; // weak indirect
            }//TESTED: "los angeles / California / United States = 72"
            else {
              nAltPrio = 75; // just for being big!
            }//TESTED: "barcelona, spain"
          }

// 2.3] CASE 2: I'm a region (best score=20, can beat current score)
//           20] Region with direct
//           50] Region with indirect
//          120] Region with no reference, if there is only 1
         
          else if ((null == city) && (nPrio > 20)) {
            nAltCase = 2;
           
            if ((null != country) && (otherCountries.contains(country))) {
              nAltPrio = 20; // strong direct
            }//TESTED: (region) "Berlin, Germany" with "Germany" mentioned
            else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
              nAltPrio = 50; // strong indirect
            }//(haven't seen, but we'll live)
            else {
              nAltPrio = 120; // (just for being there)
            }//TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China"
          }
         
// 2.4] CASE 3: I'm any foreign possibility (best score=60)
//           60] Another foreign possibility with strong direct
//           78] Another foreign possibility with strong indirect (>100K population - ie not insignificant)
//           90] Another foreign possibility with strong indirect
//          100] Another foreign possibility with weak direct
//          110] Another foreign possibility with weak indirect
         
          else if (nPrio > 60) {
            nAltCase = 3;
           
            if ((null != region) && (otherRegions.contains(region))) {
              nAltPrio = 60; // strong direct
             
              // Double check we're not falling into the trap below:
              if (!geo.getCountry_code().equals("US")) {
                Matcher m = this._statesRegex.matcher(geo.getRegion());
                if (m.matches()) { // non US state matching against (probably) US state, disregard)
                  nAltPrio = 140;
                }
              }//TESTED (same clause as below)
             
            }//TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause)
             
            if (nAltPrio > 60) { // (may need to re-run test)
              if ((null != country) && (otherCountries.contains(country))) {
                if (geo.getPopulation() < 100000) {
                  nAltPrio = 90; // strong indirect
                } //TESTED: "washington / Villa Clara / Cuba"
                else {
                  nAltPrio = 78; // strong indirect, with boost!               
                } //TESTED: "geneva, Geneve, Switzerland", pop 180K
              }
              else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                nAltPrio = 100; // weak direct
              }//TESTED: "lincoln / Lincolnshire / United Kingdom = 100"
              else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                nAltPrio = 110; // weak indirect
              }//(haven't seen, but we'll live)           
            }
          }
          // Debug:
          if ((_nDebugLevel >= 2) && (nAltPrio < 140)) {
            System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / " + geo.getCountry() + " score=" + nAltPrio);
          }
         
          // Outcome of results:
         
          if (nAltPrio < nPrio) {
            currLeader = geo;
            nPrio = nAltPrio;
            nCase = nAltCase;
          }
        } // end loop over alternativse
       
        if (null != currLeader) { // Need to change
         
          if (1 == nCase) {
            this._nMovedToLargeCity++;
           
            //(Cities are lower case in georef DB for some reason)
             String city = WordUtils.capitalize(currLeader.getCity());
           
            if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original?
             
              String region = currLeader.getRegion();
              if (region.equals("District of Columbia")) { // Special special case
                region = "D.C.";
              }
              String sCandidate = city + ", " + region;
             
              if (!sCandidate.equals(ent.getDisambiguatedName())) {
                ent.setDisambiguatedName(sCandidate);             
                ent.setIndex(ent.getDisambiguatedName() + "/city");
                ent.setSemanticLinks(null);
                bChangedAnything = true;
              }//TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC)
              else {
                this._nMovedToLargeCity--;
                _nStayedWithOriginal++;               
              }//TESTED ("Washington DC", "San Juan, Puerto Rico")
            }//TESTED (see above)
            else {
              ent.setDisambiguatedName(city + ", " + currLeader.getCountry());             
              ent.setIndex(ent.getDisambiguatedName() + "/city");
              ent.setSemanticLinks(null);
              bChangedAnything = true;
            }//TESTED: "london, california/city to London, United Kingdom"
          }
          else if (2 == nCase) {
            this._nMovedToRegion++;
            ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry());
            ent.setIndex(ent.getDisambiguatedName() + "/region");
            ent.setSemanticLinks(null);
            bChangedAnything = true;
           
          }//TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above)
          else {
            //(Cities are lower case in georef DB for some reason)
             String city = WordUtils.capitalize(currLeader.getCity());
            
            this._nMovedToForeignCity++;
            ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
            ent.setIndex(ent.getDisambiguatedName() + "/city");
            ent.setSemanticLinks(null);
            bChangedAnything = true;

          }//TESTED: "Moved geneva, new york/city to Geneva, Switzerland"
View Full Code Here

   * @param distance
   * @return
   */
  private SearchSuggestPojo buildLocation(BasicDBObject location, Double distance)
  {
    GeoFeaturePojo feature = GeoFeaturePojo.fromDb(location, GeoFeaturePojo.class);
    SearchSuggestPojo suggest = new SearchSuggestPojo();
    suggest.setOntology_type(feature.getOntology_type());
    suggest.setScore(distance);     
    suggest.setValue(buildLocation(feature));
    suggest.setGeotag(new GeoPojo(feature.getGeoindex().lat, feature.getGeoindex().lon));
    return suggest;
  }
View Full Code Here

      else
      {
        String city, region, country, countryCode = null;
       
        // Create a GeoReferencePojo from the DocSpecGeo object
        GeoFeaturePojo g = new GeoFeaturePojo();

        if (d.getCity() != null)
        {
          if (JavaScriptUtils.containsScript(d.getCity()))
          {
            city = (String)getValueFromScript(d.getCity(), null, null);
          }
          else
          {
            city = getFormattedTextFromField(d.getCity(), null);
          }

          g.setCity(city);
          g.setSearch_field(city);
        }

        if (d.getStateProvince() != null)
        {
          if (JavaScriptUtils.containsScript(d.getStateProvince()))
          {
            region = (String)getValueFromScript(d.getStateProvince(), null, null);
          }
          else
          {
            region = getFormattedTextFromField(d.getStateProvince(), null);
          }

          g.setRegion(region);
          if (g.getSearch_field() == null) g.setSearch_field(region);
        }

        if (d.getCountry() != null)
        {
          if (JavaScriptUtils.containsScript(d.getCountry()))
          {
            country = (String)getValueFromScript(d.getCountry(), null, null);
          }
          else
          {
            country = getFormattedTextFromField(d.getCountry(), null);
          }

          g.setCountry(country);
          if (g.getSearch_field() == null) g.setSearch_field(country);
        }

        if (d.getCountryCode() != null)
        {
          if (JavaScriptUtils.containsScript(d.getCountryCode()))
          {
            countryCode = (String)getValueFromScript(d.getCountryCode(), null, null);
          }
          else
          {
            countryCode = getFormattedTextFromField(d.getCountryCode(), null);
          }

          g.setCountry_code(countryCode);
          if (g.getSearch_field() == null) g.setSearch_field(countryCode);
        }

        // Send the GeoReferencePojo to enrichGeoInfo to attempt to get lat and lon values
        boolean bStrictMatch = (null == d.getStrictMatch()) || d.getStrictMatch();
        List<GeoFeaturePojo> gList = GeoReference.enrichGeoInfo(g, bStrictMatch, true, 1);
View Full Code Here

        else
        {
          String city, region, country, countryCode = null;
         
          // Create a GeoReferencePojo from the GeoSpec object
          GeoFeaturePojo gfp = new GeoFeaturePojo();

          if (gsp.getCity() != null)
          {
            if (JavaScriptUtils.containsScript(gsp.getCity()))
            {
              city = (String)getValueFromScript(gsp.getCity(), null, null);
            }
            else
            {
              city = getFormattedTextFromField(gsp.getCity(), null);
            }

            gfp.setCity(city);
            gfp.setSearch_field(city);
          }

          if (gsp.getStateProvince() != null)
          {
            if (JavaScriptUtils.containsScript(gsp.getStateProvince()))
            {
              region = (String)getValueFromScript(gsp.getStateProvince(), null, null);
            }
            else
            {
              region = getFormattedTextFromField(gsp.getStateProvince(), null);
            }

            gfp.setRegion(region);
            if (gfp.getSearch_field() == null) gfp.setSearch_field(region);
          }

          if (gsp.getCountry() != null)
          {
            if (JavaScriptUtils.containsScript(gsp.getCountry()))
            {
              country = (String)getValueFromScript(gsp.getCountry(), null, null);
            }
            else
            {
              country = getFormattedTextFromField(gsp.getCountry(), null);
            }

            gfp.setCountry(country);
            if (gfp.getSearch_field() == null) gfp.setSearch_field(country);
          }

          if (gsp.getCountryCode() != null)
          {
            if (JavaScriptUtils.containsScript(gsp.getCountryCode()))
            {
              countryCode = (String)getValueFromScript(gsp.getCountryCode(), null, null);
            }
            else
            {
              countryCode = getFormattedTextFromField(gsp.getCountryCode(), null);
            }

            gfp.setCountry_code(countryCode);
            // (Don't set to search field for country code - it will be equal to country...)
          }

          // Send the GeoReferencePojo to enrichGeoInfo to attempt to get lat and lon values
          boolean bStrictMatch = (null == gsp.getStrictMatch()) || gsp.getStrictMatch();
          List<GeoFeaturePojo> gList = GeoReference.enrichGeoInfo(gfp, bStrictMatch, true, 1);
          GeoFeaturePojo firstGeo = gList.get(0);
          latValue = firstGeo.getGeoindex().lat.toString();
          lonValue = firstGeo.getGeoindex().lon.toString();
          gsp.setOntology_type(firstGeo.getOntology_type());
         
          // Set lat and long in DocGeo if possible
          dLat = Double.parseDouble(latValue);
          dLon = Double.parseDouble(lonValue);
        }
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.feature.geo.GeoFeaturePojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.