Package org.apache.lucene.index

Examples of org.apache.lucene.index.Terms


    final long sumTotalTermFreq;
    final long sumDocFreq;

    assert field != null;
   
    Terms terms = MultiFields.getTerms(reader, field);
    if (terms == null) {
      docCount = 0;
      sumTotalTermFreq = 0;
      sumDocFreq = 0;
    } else {
      docCount = terms.getDocCount();
      sumTotalTermFreq = terms.getSumTotalTermFreq();
      sumDocFreq = terms.getSumDocFreq();
    }
    return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
  }
View Full Code Here


      final AtomicReader reader = context.reader();
      final Bits liveDocs = acceptDocs;
     
      PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];

      final Terms fieldTerms = reader.terms(field);
      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
      final TermsEnum termsEnum = fieldTerms.iterator(null);

      for (int pos=0; pos<postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);

        final DocsAndPositionsEnum postingsEnum;
View Full Code Here

   * @return the coefficient
   * @throws IOException If there are problems reading term vectors from the underlying Lucene index.
   */
  private double createCoefficient(int doc, Set<String> matchedTokens, String prefixToken) throws IOException {

    Terms tv = searcher.getIndexReader().getTermVector(doc, TEXT_FIELD_NAME);
    TermsEnum it = tv.iterator(TermsEnum.EMPTY);

    Integer position = Integer.MAX_VALUE;
    BytesRef term;
    // find the closest token position
    while ((term = it.next()) != null) {
View Full Code Here

   * MultiTermQuery semantics.
   */
  public void testRewriteSingleTerm() throws IOException {
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
        .makeString("piece"));
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
    assertEquals(1, automatonQueryNrHits(aq));
  }
View Full Code Here

    Automaton pfx = BasicAutomata.makeString("do");
    pfx.expandSingleton(); // expand singleton representation for testing
    Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
        .makeAnyString());
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    assertEquals(3, automatonQueryNrHits(aq));
  }
View Full Code Here

  public void testEmptyOptimization() throws IOException {
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
        .makeEmpty());
    // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
    // instanceof EmptyTermEnum);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
    assertEquals(0, automatonQueryNrHits(aq));
  }
View Full Code Here

        writer.addDocument(doc);
        count++;
      }
      reader = DirectoryReader.open(writer, false);

      Terms terms = MultiFields.getTerms(reader, "body");
      if (terms == null) {
        throw new IllegalArgumentException("need at least one suggestion");
      }

      // Move all ngrams into an FST:
      TermsEnum termsEnum = terms.iterator(null);

      Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
      Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);

      IntsRef scratchInts = new IntsRef();
View Full Code Here

      final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();

      final IndexReader reader = searcher.getIndexReader();
      if (reader.maxDoc() > 0) {
        for (final AtomicReaderContext ctx : reader.leaves()) {
          Terms terms = ctx.reader().terms(F_WORD);
          if (terms != null)
            termsEnums.add(terms.iterator(null));
        }
      }
     
      boolean isEmpty = termsEnums.isEmpty();
View Full Code Here

    private final TermsEnum termsEnum;
    private int minNumDocs;
    private long freq;

    HighFrequencyIterator() throws IOException {
      Terms terms = MultiFields.getTerms(reader, field);
      if (terms != null) {
        termsEnum = terms.iterator(null);
      } else {
        termsEnum = null;
      }
      minNumDocs = (int)(thresh * (float)reader.numDocs());
    }
View Full Code Here

      throw new IOException("You must first call Classifier#train");
    }
    double max = - Double.MAX_VALUE;
    BytesRef foundClass = new BytesRef();

    Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef next;
    String[] tokenizedDoc = tokenizeDoc(inputDocument);
    while ((next = termsEnum.next()) != null) {
      double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
      if (clVal > max) {
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.Terms

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.