Package org.apache.lucene.search

Examples of org.apache.lucene.search.MaxNonCompetitiveBoostAttribute


   */
  protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                                 float accuracy, final CharsRef spare) throws IOException {
   
    AttributeSource atts = new AttributeSource();
    MaxNonCompetitiveBoostAttribute maxBoostAtt =
      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
     
      if (score < accuracy)
        continue;
     
      // add new entry in PQ
      st.term = BytesRef.deepCopyOf(candidateTerm);
      st.boost = boost;
      st.docfreq = df;
      st.termAsString = termAsString;
      st.score = score;
      stQueue.offer(st);
      // possibly drop entries from queue
      st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
      maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
    }
     
    return stQueue;
  }
View Full Code Here


   */
  protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                                 float accuracy, final CharsRef spare) throws IOException {
   
    AttributeSource atts = new AttributeSource();
    MaxNonCompetitiveBoostAttribute maxBoostAtt =
      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
     
      if (score < accuracy)
        continue;
     
      // add new entry in PQ
      st.term = BytesRef.deepCopyOf(candidateTerm);
      st.boost = boost;
      st.docfreq = df;
      st.termAsString = termAsString;
      st.score = score;
      stQueue.offer(st);
      // possibly drop entries from queue
      st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
      maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
    }
     
    return stQueue;
  }
View Full Code Here

   */
  protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                                 float accuracy, final CharsRef spare) throws IOException {
   
    AttributeSource atts = new AttributeSource();
    MaxNonCompetitiveBoostAttribute maxBoostAtt =
      atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
      return Collections.emptyList();
    }
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
   
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt =
      e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = e.next()) != null) {
      final float boost = boostAtt.getBoost();
      // ignore uncompetitive hits
      if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
        continue;
     
      // ignore exact match of the same term
      if (queryTerm.bytesEquals(candidateTerm))
        continue;
     
      int df = e.docFreq();
     
      // check docFreq if required
      if (df <= docfreq)
        continue;
     
      final float score;
      final String termAsString;
      if (distance == INTERNAL_LEVENSHTEIN) {
        // delay creating strings until the end
        termAsString = null;
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
     
      if (score < accuracy)
        continue;
     
      // add new entry in PQ
      st.term = BytesRef.deepCopyOf(candidateTerm);
      st.boost = boost;
      st.docfreq = df;
      st.termAsString = termAsString;
      st.score = score;
      stQueue.offer(st);
      // possibly drop entries from queue
      st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
      maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
    }
     
    return stQueue;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.MaxNonCompetitiveBoostAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.