Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum.docFreq()


      int result = 1;
      currentTerm = currentTerm.createTerm(term);
      try {
        TermEnum termEnum = reader.terms(currentTerm);
        if (termEnum != null && termEnum.term().equals(currentTerm)) {
          result = termEnum.docFreq();
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      return result;
View Full Code Here


              // if we are past the upper term, or equal to it (when don't include upper) then stop.
              if (upperCmp>0 || (upperCmp==0 && !upperIncl)) break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
              // add the term to the list
              String label = raw ? indexedText : ft.indexedToReadable(indexedText);
              if (sort) {
                queue.add(new CountPair<String, Integer>(label, docFreq));
View Full Code Here

        if( tiq == null ) {
          tiq = new TopTermQueue( numTerms+1 );
          info.put( field, tiq );
        }
        tiq.distinctTerms++;
        tiq.histogram.add( terms.docFreq() )// add the term to the histogram
       
        // Only save the distinct terms for fields we worry about
        if (fields != null && fields.size() > 0) {
          if( !fields.contains( field ) ) {
            continue;
View Full Code Here

        }
        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
View Full Code Here

        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
        }
View Full Code Here

    Term term = te.term();
    while (term != null) {
      if (term.field() != internedFieldName) {
        break;
      }
      if (te.docFreq() > maxDocFreq) {
        stopWords.add(term.text());
      }
      if (!te.next()) {
        break;
      }
View Full Code Here

        if( tiq == null ) {
          tiq = new TopTermQueue( numTerms+1 );
          info.put( field, tiq );
        }
        tiq.distinctTerms++;
        tiq.histogram.add( terms.docFreq() )// add the term to the histogram
       
        // Only save the distinct terms for fields we worry about
        if (fields != null && fields.size() > 0) {
          if( !fields.contains( field ) ) {
            continue;
View Full Code Here

        }
        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.put(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
View Full Code Here

        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.put(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
        }
View Full Code Here

    TermFreqQueue tiq = new TermFreqQueue(count);
    TermEnum terms = reader.terms();
     
    int minFreq = 0;
    while (terms.next()) {
      if (terms.docFreq() > minFreq) {
        tiq.put(new TermFreq(terms.term(), terms.docFreq()));
        if (tiq.size() >= count) {                 // if tiq overfull
          tiq.pop();                              // remove lowest in tiq
          minFreq = ((TermFreq)tiq.top()).docFreq; // reset minFreq
        }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.