Package org.apache.lucene.search

Examples of org.apache.lucene.search.FuzzyTermsEnum$LevenshteinAutomataAttribute


  @Override
  protected TermsEnum getTermsEnum(final Terms terms, final AttributeSource atts) throws IOException {
    if (maxEdits == 0 || prefixLength >= term.text().length()) {  // can only match if it's exact
      return new SingleTermsEnum(terms.iterator(null), term.bytes());
    }
    return new FuzzyTermsEnum(terms, atts, this.getTerm(), maxEdits, prefixLength, transpositions);
  }
View Full Code Here


      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
View Full Code Here

      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
View Full Code Here

      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.FuzzyTermsEnum$LevenshteinAutomataAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.