Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum.docFreq()


    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
      out.printf("%s %d\n", term.text(), te.docFreq());
    } while (te.next());
    te.close();
  }
 
  /** dump the values stored in the specified field for each document.
View Full Code Here


  protected float calculateWeight(Term term, IndexReader reader) throws IOException {
    //if a term is not in the index, then it's weight is 0
    TermEnum termEnum = reader.terms(term);
    if (termEnum != null && termEnum.term() != null && termEnum.term().equals(term)) {
      return 1.0f / termEnum.docFreq();
    } else {
      log.warn("Couldn't find doc freq for term {}", term);
      return 0;
    }
View Full Code Here

    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
      System.err.printf("%s %d\n", term.text(), te.docFreq());
    } while (te.next());
    te.close();
  }
 
  public static void emitTextForTags(File file, File output) throws IOException {
View Full Code Here

      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
     
      if (te.docFreq() > 30) {
        File f = new File(output, term.text() + ".txt");
        PrintWriter pw = new PrintWriter(new FileWriter(f));
        System.err.printf("%s %d\n", term.text(), te.docFreq());
       
        TermDocs td = reader.termDocs(term);
View Full Code Here

      }
     
      if (te.docFreq() > 30) {
        File f = new File(output, term.text() + ".txt");
        PrintWriter pw = new PrintWriter(new FileWriter(f));
        System.err.printf("%s %d\n", term.text(), te.docFreq());
       
        TermDocs td = reader.termDocs(term);
        while (td.next()) {
          int doc = td.doc();
          buf.setLength(0);
View Full Code Here

      if (terms != null && terms.term() != null) {
        do {
          if (!terms.term().field().equals(field)) {
            break;
          }
          tiq.insertWithOverflow(new TermStats(terms.term(), terms.docFreq()));
        } while (terms.next());
      } else {
        System.out.println("No terms for field \"" + field + "\"");
      }
    } else {
View Full Code Here

        System.out.println("No terms for field \"" + field + "\"");
      }
    } else {
      TermEnum terms = reader.terms();
      while (terms.next()) {
        tiq.insertWithOverflow(new TermStats(terms.term(), terms.docFreq()));
      }
    }

    TermStats[] result = new TermStats[tiq.size()];
View Full Code Here

      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
        instantiatedTerm.setTermIndex(terms.size());
        terms.add(instantiatedTerm);
        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
      }
    }
    termEnum.close();
    orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
View Full Code Here

                FuzzyTermEnum fe=new FuzzyTermEnum(reader,startTerm,f.minSimilarity,f.prefixLength);
                TermEnum origEnum = reader.terms(startTerm);
                int df=0;
                if(startTerm.equals(origEnum.term()))
                {
                    df=origEnum.docFreq(); //store the df so all variants use same idf
                }
                int numVariants=0;
                int totalVariantDocFreqs=0;
                do
                {
View Full Code Here

      */
      final int output;
      if (storeOrd) {
        output = ord;
      } else {
        output = termEnum.docFreq();
      }
      //System.out.println("ADD: " + term.text() + " ch[0]=" + (term.text().length() == 0 ? -1 : term.text().charAt(0)));
      builder.add(toIntsRef(term.text()), outputs.get(output));
      ord++;
      if (ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.