Package org.apache.lucene.search.spell

Examples of org.apache.lucene.search.spell.StringDistance


      }
    }

    int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
    float min = 0.5f;
    StringDistance sd = null;
    int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
    SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
    if (checker instanceof AbstractLuceneSpellChecker) {
      AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
      min = spellChecker.getAccuracy();
      sd = spellChecker.getStringDistance();
    }
    if (sd == null)
      sd = new LevensteinDistance();

    Collection<Token> tokens = null;
    try {
      tokens = getTokens(origQuery, checker.getQueryAnalyzer());
    } catch (IOException e) {
      LOG.error("Could not get tokens (this should never happen)", e);
    }

    // original token -> corresponding Suggestion object (keep track of start,end)
    Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
    // original token string -> summed up frequency
    Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
    // original token string -> # of shards reporting it as misspelled
    Map<String, Integer> origVsShards = new HashMap<String, Integer>();
    // original token string -> set of alternatives
    // must preserve order because collation algorithm can only work in-order
    Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
    // alternative string -> corresponding SuggestWord object
    Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
    Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
   
    int totalNumberShardResponses = 0;
    for (ShardRequest sreq : rb.finished) {
      for (ShardResponse srsp : sreq.responses) {
        NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
        LOG.info(srsp.getShard() + " " + nl);
        if (nl != null) {
          totalNumberShardResponses++;
          SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
          for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
            origVsSuggestion.put(suggestion.getToken(), suggestion);
            HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
            if (suggested == null) {
              suggested = new HashSet<String>();
              origVsSuggested.put(suggestion.getToken(), suggested);
            }

            // sum up original frequency         
            int origFreq = 0;
            Integer o = origVsFreq.get(suggestion.getToken());
            if (o != nullorigFreq += o;
            origFreq += suggestion.getOriginalFrequency();
            origVsFreq.put(suggestion.getToken(), origFreq);
           
            //# shards reporting
            Integer origShards = origVsShards.get(suggestion.getToken());
            if(origShards==null) {
              origVsShards.put(suggestion.getToken(), 1);
            } else {
              origVsShards.put(suggestion.getToken(), ++origShards);
            }           

            // find best suggestions
            for (int i = 0; i < suggestion.getNumFound(); i++) {
              String alternative = suggestion.getAlternatives().get(i);
              suggested.add(alternative);
              SuggestWord sug = suggestedVsWord.get(alternative);
              if (sug == null)  {
                sug = new SuggestWord();
                suggestedVsWord.put(alternative, sug);
              }
              sug.string = alternative;
              // alternative frequency is present only for extendedResults=true
              if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
                Integer freq = suggestion.getAlternativeFrequencies().get(i);
                if (freq != null) sug.freq += freq;
              }
            }
          }
          NamedList suggestions = (NamedList) nl.get("suggestions");
          if(suggestions != null) {
            List<Object> collationList = suggestions.getAll("collation");
            List<Object> collationRankList = suggestions.getAll("collationInternalRank");
            int i=0;
            if(collationList != null) {
              for(Object o : collationList)
              {
                if(o instanceof String)
                {
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) o);
                  if(collationRankList!= null && collationRankList.size()>0)
                  {
                    coll.setInternalRank((Integer) collationRankList.get(i));
                    i++;
                  }
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                } else
                {
                  NamedList expandedCollation = (NamedList) o;                 
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
                  coll.setHits((Integer) expandedCollation.get("hits"));
                  if(maxCollationTries>0)
                  {
                    coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
                  }
                  coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setHits(coll.getHits() + priorColl.getHits());
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                }
              }
            }
          }
        }
      }
    }

    // all shard responses have been collected
    // create token and get top suggestions
    SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
    for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
      String original = entry.getKey();
     
      //Only use this suggestion if all shards reported it as misspelled.
      Integer numShards = origVsShards.get(original);
      if(numShards<totalNumberShardResponses) {
        continue;
      }
     
      HashSet<String> suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
View Full Code Here


    SolrIndexSearcher searcher = holder.get();
    try {
    checker.build(core, searcher);
    SpellChecker sc = checker.getSpellChecker();
    assertTrue("sc is null and it shouldn't be", sc != null);
    StringDistance sd = sc.getStringDistance();
    assertTrue("sd is null and it shouldn't be", sd != null);
    assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
    } finally {
      holder.decref();
    }
View Full Code Here

      }
    }

    int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
    float min = 0.5f;
    StringDistance sd = null;
    int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
    SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
    if (checker instanceof AbstractLuceneSpellChecker) {
      AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
      min = spellChecker.getAccuracy();
      sd = spellChecker.getStringDistance();
    }
    if (sd == null)
      sd = new LevensteinDistance();

    Collection<Token> tokens = null;
    try {
      tokens = getTokens(origQuery, checker.getQueryAnalyzer());
    } catch (IOException e) {
      LOG.error("Could not get tokens (this should never happen)", e);
    }

    // original token -> corresponding Suggestion object (keep track of start,end)
    Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
    // original token string -> summed up frequency
    Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
    // original token string -> # of shards reporting it as misspelled
    Map<String, Integer> origVsShards = new HashMap<String, Integer>();
    // original token string -> set of alternatives
    // must preserve order because collation algorithm can only work in-order
    Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
    // alternative string -> corresponding SuggestWord object
    Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
    Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
   
    int totalNumberShardResponses = 0;
    for (ShardRequest sreq : rb.finished) {
      for (ShardResponse srsp : sreq.responses) {
        NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
        LOG.info(srsp.getShard() + " " + nl);
        if (nl != null) {
          totalNumberShardResponses++;
          SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
          for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
            origVsSuggestion.put(suggestion.getToken(), suggestion);
            HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
            if (suggested == null) {
              suggested = new HashSet<String>();
              origVsSuggested.put(suggestion.getToken(), suggested);
            }

            // sum up original frequency         
            int origFreq = 0;
            Integer o = origVsFreq.get(suggestion.getToken());
            if (o != nullorigFreq += o;
            origFreq += suggestion.getOriginalFrequency();
            origVsFreq.put(suggestion.getToken(), origFreq);
           
            //# shards reporting
            Integer origShards = origVsShards.get(suggestion.getToken());
            if(origShards==null) {
              origVsShards.put(suggestion.getToken(), 1);
            } else {
              origVsShards.put(suggestion.getToken(), ++origShards);
            }           

            // find best suggestions
            for (int i = 0; i < suggestion.getNumFound(); i++) {
              String alternative = suggestion.getAlternatives().get(i);
              suggested.add(alternative);
              SuggestWord sug = suggestedVsWord.get(alternative);
              if (sug == null)  {
                sug = new SuggestWord();
                suggestedVsWord.put(alternative, sug);
              }
              sug.string = alternative;
              // alternative frequency is present only for extendedResults=true
              if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
                Integer freq = suggestion.getAlternativeFrequencies().get(i);
                if (freq != null) sug.freq += freq;
              }
            }
          }
          NamedList suggestions = (NamedList) nl.get("suggestions");
          if(suggestions != null) {
            List<Object> collationList = suggestions.getAll("collation");
            List<Object> collationRankList = suggestions.getAll("collationInternalRank");
            int i=0;
            if(collationList != null) {
              for(Object o : collationList)
              {
                if(o instanceof String)
                {
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) o);
                  if(collationRankList!= null && collationRankList.size()>0)
                  {
                    coll.setInternalRank((Integer) collationRankList.get(i));
                    i++;
                  }
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                } else
                {
                  NamedList expandedCollation = (NamedList) o;                 
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
                  coll.setHits((Integer) expandedCollation.get("hits"));
                  if(maxCollationTries>0)
                  {
                    coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
                  }
                  coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setHits(coll.getHits() + priorColl.getHits());
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                }
              }
            }
          }
        }
      }
    }

    // all shard responses have been collected
    // create token and get top suggestions
    SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
    for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
      String original = entry.getKey();
     
      //Only use this suggestion if all shards reported it as misspelled.
      Integer numShards = origVsShards.get(original);
      if(numShards<totalNumberShardResponses) {
        continue;
      }
     
      HashSet<String> suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
View Full Code Here

      }
    }
    sourceLocation = (String) config.get(LOCATION);
    field = (String) config.get(FIELD);
    String strDistanceName = (String)config.get(STRING_DISTANCE);
    StringDistance sd = null;
    if (strDistanceName != null) {
      sd = (StringDistance) core.getResourceLoader().newInstance(strDistanceName);
      //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
      sd = new LevensteinDistance();
View Full Code Here

      }
    }
    sourceLocation = (String) config.get(LOCATION);
    field = (String) config.get(FIELD);
    String strDistanceName = (String)config.get(STRING_DISTANCE);
    StringDistance sd = null;
    if (strDistanceName != null) {
      sd = (StringDistance) core.getResourceLoader().newInstance(strDistanceName);
      //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
      sd = new LevensteinDistance();
View Full Code Here

      }
    }
    sourceLocation = (String) config.get(LOCATION);
    field = (String) config.get(FIELD);
    String strDistanceName = (String)config.get(STRING_DISTANCE);
    StringDistance sd = null;
    if (strDistanceName != null) {
      sd = (StringDistance) core.getResourceLoader().newInstance(strDistanceName);
      //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
      sd = new LevensteinDistance();
View Full Code Here

      }
    }
    sourceLocation = (String) config.get(LOCATION);
    field = (String) config.get(FIELD);
    String strDistanceName = (String)config.get(STRING_DISTANCE);
    StringDistance sd = null;
    if (strDistanceName != null) {
      sd = (StringDistance) core.getResourceLoader().newInstance(strDistanceName);
      //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
      sd = new LevensteinDistance();
View Full Code Here

    SolrIndexSearcher searcher = holder.get();
    try {
    checker.build(core, searcher);
    SpellChecker sc = checker.getSpellChecker();
    assertTrue("sc is null and it shouldn't be", sc != null);
    StringDistance sd = sc.getStringDistance();
    assertTrue("sd is null and it shouldn't be", sd != null);
    assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
    } finally {
      holder.decref();
    }
View Full Code Here

 
  /**
   * @param args
   */
  public static void main(String[] args) {
      StringDistance sd = new JaroWinklerDistance();
     
      displayDistance(sd, "henka", "henkan");
      displayDistance(sd, "al", "al");
      displayDistance(sd, "martha", "marhta");
      displayDistance(sd, "jones", "johnson");
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.spell.StringDistance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.