Examples of org.apache.lucene.search.FuzzyTermsEnum$LevenshteinAutomataAttribute

org.apache.lucene.search.FuzzyTermsEnum
reuses compiled automata across different segments, because they are independent of the index @lucene.internal

  @Override
  protected TermsEnum getTermsEnum(final Terms terms, final AttributeSource atts) throws IOException {
    if (maxEdits == 0 || prefixLength >= term.text().length()) {  // can only match if it's exact
      return new SingleTermsEnum(terms.iterator(null), term.bytes());
    }
    return new FuzzyTermsEnum(terms, atts, this.getTerm(), maxEdits, prefixLength, transpositions);
  }

View Full Code Here

      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
      
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
      
      int df = e.docFreq();
      
      // check docFreq if required
      if (df <= docfreq)
        continue;
      
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }

View Full Code Here

      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
      
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
      
      int df = e.docFreq();
      
      // check docFreq if required
      if (df <= docfreq)
        continue;
      
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }

View Full Code Here

      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
    
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
      
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
      
      int df = e.docFreq();
      
      // check docFreq if required
      if (df <= docfreq)
        continue;
      
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }

View Full Code Here

TOP

Related Classes of org.apache.lucene.search.FuzzyTermsEnum$LevenshteinAutomataAttribute

org.apache.lucene.index.TermsEnum

org.apache.lucene.search.spell.DirectSpellChecker

org.apache.lucene.util.automaton.Automaton

org.apache.lucene.util.automaton.CompiledAutomaton

org.apache.lucene.util.automaton.LevenshteinAutomata

org.apache.lucene.util.BytesRef

org.sindice.siren.search.node.NodeFuzzyQuery

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.