Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum.docFreq()


    assertEquals(200, termEnum.docFreq());
    // go to the second term (bbb)
    termEnum.next();
    // assert that term is 'bbb'
    assertEquals("bbb", termEnum.term().text());
    assertEquals(100, termEnum.docFreq());

    termEnum.close();


    // create enumeration of terms after term 'aaa', including 'aaa'
View Full Code Here


    // create enumeration of terms after term 'aaa', including 'aaa'
    termEnum = reader.terms(new Term("content", "aaa"));
    // assert that term is 'aaa'
    assertEquals("aaa", termEnum.term().text());
    assertEquals(200, termEnum.docFreq());
    // go to term 'bbb'
    termEnum.next();
    // assert that term is 'bbb'
    assertEquals("bbb", termEnum.term().text());
    assertEquals(100, termEnum.docFreq());
View Full Code Here

    assertEquals(200, termEnum.docFreq());
    // go to term 'bbb'
    termEnum.next();
    // assert that term is 'bbb'
    assertEquals("bbb", termEnum.term().text());
    assertEquals(100, termEnum.docFreq());

    termEnum.close();
  }

  private void addDoc(IndexWriter writer, String value) throws IOException
View Full Code Here

        if( tiq == null ) {
          tiq = new TopTermQueue( numTerms+1 );
          info.put( field, tiq );
        }
        tiq.distinctTerms++;
        tiq.histogram.add( terms.docFreq() )// add the term to the histogram
       
        // Only save the distinct terms for fields we worry about
        if (fields != null && fields.size() > 0) {
          if( !fields.contains( field ) ) {
            continue;
View Full Code Here

        }
        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.put(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
View Full Code Here

        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.put(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
        }
View Full Code Here

        IndexReader reader = IndexReader.open(FSDirectory.open(new File(dir)), true);
        TermEnum terms = reader.terms();
       
        while (terms.next()) { // &&  (terms.docFreq() > 2)) {
            if (terms.docFreq() > 1000) {
              System.out.print( terms.term().text());
              System.out.print(" -- ");
              System.out.println(terms.docFreq());
            }
        }
View Full Code Here

       
        while (terms.next()) { // &&  (terms.docFreq() > 2)) {
            if (terms.docFreq() > 1000) {
              System.out.print( terms.term().text());
              System.out.print(" -- ");
              System.out.println(terms.docFreq());
            }
        }
       
       
       // out.writeStartDocument();
View Full Code Here

    Term term = te.term();
    while (term != null) {
      if (term.field() != internedFieldName) {
        break;
      }
      if (te.docFreq() > maxDocFreq) {
        stopWords.add(term.text());
      }
      if (!te.next()) {
        break;
      }
View Full Code Here

    TermFreqQueue tiq = new TermFreqQueue(count);
    TermEnum terms = reader.terms();
     
    int minFreq = 0;
    while (terms.next()) {
      if (terms.docFreq() > minFreq) {
        tiq.put(new TermFreq(terms.term(), terms.docFreq()));
        if (tiq.size() >= count) {                 // if tiq overfull
          tiq.pop();                              // remove lowest in tiq
          minFreq = ((TermFreq)tiq.top()).docFreq; // reset minFreq
        }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.