Examples of docFreq()


Examples of org.apache.lucene.index.MultiReader.docFreq()

        int numDocs = reader.numDocs();
        terms = reader.terms();
        while(terms.next() && queryTerms.size() < 10000)
        {
          Term term = terms.term();
          int docFreq = reader.docFreq(term);
          if(docFreq * 100 >= numDocs) queryTerms.add(term.text());
        }
      }
      catch (IOException e)
      {
View Full Code Here

Examples of org.apache.lucene.index.TermContext.docFreq()

    for (int i = 0; i < queryTerms.length; i++) {
      TermContext termContext = contextArray[i];
      if (termContext == null) {
        lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
      } else {
        if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
            || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
                * (float) maxDoc))) {
          highFreq
              .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
        } else {
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

        break;
      }
      assertTrue(testTermEnum.next());

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

        break;
      }
      assertTrue(testTermEnum.next());

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

        }
      }
    }
    else {
      while (terms.next()) {
        tiq.insertWithOverflow(new TermInfo(terms.term(), terms.docFreq()));
      }
    }
    while (tiq.size() != 0) {
      TermInfo termInfo = tiq.pop();
      System.out.println(termInfo.term + " " + termInfo.docFreq);
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

    while (terms.next()) {
      Term term = terms.term();
      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
      //if we're either not looking by field or we're matching the specific field
      if ((field == null) || field.equals(term.field()))
        termMap.put(term.field() + ":" + term.text(), Integer.valueOf((terms.docFreq())));
    }

    Iterator<String> termIterator = termMap.keySet().iterator();
    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
      String termDetails = termIterator.next();
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
        instantiatedTerm.setTermIndex(terms.size());
        terms.add(instantiatedTerm);
        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
      }
    }
    termEnum.close();
    orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

    Term term = te.term();
    while (term != null) {
      if (term.field() != internedFieldName) {
        break;
      }
      if (te.docFreq() > maxDocFreq) {
        stopWords.add(term.text());
      }
      if (!te.next()) {
        break;
      }
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

        if( tiq == null ) {
          tiq = new TopTermQueue( numTerms+1 );
          info.put( field, tiq );
        }
        tiq.distinctTerms++;
        tiq.histogram.add( terms.docFreq() )// add the term to the histogram
       
        // Only save the distinct terms for fields we worry about
        if (fields != null && fields.size() > 0) {
          if( !fields.contains( field ) ) {
            continue;
View Full Code Here

Examples of org.apache.lucene.index.TermEnum.docFreq()

        }
        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.