Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermDocs


      if (te.docFreq() > 30) {
        File f = new File(output, term.text() + ".txt");
        PrintWriter pw = new PrintWriter(new FileWriter(f));
        System.err.printf("%s %d\n", term.text(), te.docFreq());
       
        TermDocs td = reader.termDocs(term);
        while (td.next()) {
          int doc = td.doc();
          buf.setLength(0);
          appendVectorTerms(buf, reader.getTermFreqVector(doc, "description-clustering"));
          appendVectorTerms(buf, reader.getTermFreqVector(doc, "extended-clustering"));
          emitTagDoc(term, pw, buf);
        }
View Full Code Here


    return ts;
  }
 
  public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
    long totalTF = 0;
    TermDocs td = reader.termDocs(term);
    while (td.next()) {
      totalTF += td.freq();
    }
    return totalTF;
  }
View Full Code Here

    // Separately count how many tokens are actually in the index:
    IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory());
    assertEquals(NUM_DOCS, reader.numDocs());

    TermEnum terms = reader.terms();
    TermDocs termDocs = reader.termDocs();
    int totalTokenCount2 = 0;
    while(terms.next()) {
      termDocs.seek(terms.term());
      while(termDocs.next())
        totalTokenCount2 += termDocs.freq();
    }
    reader.close();

    // Make sure they are the same
    assertEquals(totalTokenCount1, totalTokenCount2);
View Full Code Here

    protected Object createValue(IndexReader reader, Entry entryKey)
        throws IOException {
      String field = StringHelper.intern((String) entryKey.field);
      final String[] retArray = new String[reader.maxDoc()];
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          String termval = term.text();
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } finally {
        termDocs.close();
        termEnum.close();
      }
      return retArray;
    }
View Full Code Here

    protected Object createValue(IndexReader reader, Entry entryKey)
        throws IOException {
      String field = StringHelper.intern((String) entryKey.field);
      final int[] retArray = new int[reader.maxDoc()];
      String[] mterms = new String[reader.maxDoc()+1];
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      int t = 0// current term number

      // an entry for documents that have no terms in this field
      // should a document with no terms be at top or bottom?
      // this puts them at the top - if it is changed, FieldDocSortedHitQueue
      // needs to change as well.
      mterms[t++] = null;

      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;

          // store term text
          // we expect that there is at most one term per document
          if (t >= mterms.length) throw new RuntimeException ("there are more terms than " +
                  "documents in field \"" + field + "\", but it's impossible to sort on " +
                  "tokenized fields");
          mterms[t] = term.text();

          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = t;
          }

          t++;
        } while (termEnum.next());
      } finally {
        termDocs.close();
        termEnum.close();
      }

      if (t == 0) {
        // if there are no terms, make the term array
View Full Code Here

        throws IOException {
      Entry entry = (Entry) entryKey;
      String field = entry.field;
      SortComparator comparator = (SortComparator) entry.custom;
      final Comparable[] retArray = new Comparable[reader.maxDoc()];
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          Comparable termval = comparator.getComparable (term.text());
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } finally {
        termDocs.close();
        termEnum.close();
      }
      return retArray;
    }
View Full Code Here

      ByteParser parser = (ByteParser) entry.custom;
      if (parser == null) {
        return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER);
      }
      final byte[] retArray = new byte[reader.maxDoc()];
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          byte termval = parser.parseByte(term.text());
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } catch (StopFillCacheException stop) {
      } finally {
        termDocs.close();
        termEnum.close();
      }
      return retArray;
    }
View Full Code Here

      ShortParser parser = (ShortParser) entry.custom;
      if (parser == null) {
        return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER);
      }
      final short[] retArray = new short[reader.maxDoc()];
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          short termval = parser.parseShort(term.text());
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } catch (StopFillCacheException stop) {
      } finally {
        termDocs.close();
        termEnum.close();
      }
      return retArray;
    }
View Full Code Here

        } catch (NumberFormatException ne) {
          return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER);     
        }
      }
      int[] retArray = null;
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          int termval = parser.parseInt(term.text());
          if (retArray == null) // late init
            retArray = new int[reader.maxDoc()];
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } catch (StopFillCacheException stop) {
      } finally {
        termDocs.close();
        termEnum.close();
      }
      if (retArray == null) // no values
        retArray = new int[reader.maxDoc()];
      return retArray;
View Full Code Here

        } catch (NumberFormatException ne) {
          return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER);     
        }
    }
      float[] retArray = null;
      TermDocs termDocs = reader.termDocs();
      TermEnum termEnum = reader.terms (new Term (field));
      try {
        do {
          Term term = termEnum.term();
          if (term==null || term.field() != field) break;
          float termval = parser.parseFloat(term.text());
          if (retArray == null) // late init
            retArray = new float[reader.maxDoc()];
          termDocs.seek (termEnum);
          while (termDocs.next()) {
            retArray[termDocs.doc()] = termval;
          }
        } while (termEnum.next());
      } catch (StopFillCacheException stop) {
      } finally {
        termDocs.close();
        termEnum.close();
      }
      if (retArray == null) // no values
        retArray = new float[reader.maxDoc()];
      return retArray;
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermDocs

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.