Examples of org.apache.lucene.index.Terms

org.apache.lucene.index.Terms
Access to the terms in a specific field. See {@link Fields}. @lucene.experimental

    final long sumTotalTermFreq;
    final long sumDocFreq;


    assert field != null;
    
    Terms terms = MultiFields.getTerms(reader, field);
    if (terms == null) {
      docCount = 0;
      sumTotalTermFreq = 0;
      sumDocFreq = 0;
    } else {
      docCount = terms.getDocCount();
      sumTotalTermFreq = terms.getSumTotalTermFreq();
      sumDocFreq = terms.getSumDocFreq();
    }
    return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
  }

View Full Code Here

      final AtomicReader reader = context.reader();
      final Bits liveDocs = acceptDocs;
      
      PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];


      final Terms fieldTerms = reader.terms(field);
      if (fieldTerms == null) {
        return null;
      }


      // Reuse single TermsEnum below:
      final TermsEnum termsEnum = fieldTerms.iterator(null);


      for (int pos=0; pos<postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);


        final DocsAndPositionsEnum postingsEnum;

View Full Code Here

   * @return the coefficient
   * @throws IOException If there are problems reading term vectors from the underlying Lucene index.
   */
  private double createCoefficient(int doc, Set<String> matchedTokens, String prefixToken) throws IOException {


    Terms tv = searcher.getIndexReader().getTermVector(doc, TEXT_FIELD_NAME);
    TermsEnum it = tv.iterator(TermsEnum.EMPTY);


    Integer position = Integer.MAX_VALUE;
    BytesRef term;
    // find the closest token position
    while ((term = it.next()) != null) {

View Full Code Here

   * MultiTermQuery semantics.
   */
  public void testRewriteSingleTerm() throws IOException {
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
        .makeString("piece"));
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
    assertEquals(1, automatonQueryNrHits(aq));
  }

View Full Code Here

    Automaton pfx = BasicAutomata.makeString("do");
    pfx.expandSingleton(); // expand singleton representation for testing
    Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
        .makeAnyString());
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    assertEquals(3, automatonQueryNrHits(aq));
  }

View Full Code Here

  public void testEmptyOptimization() throws IOException {
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
        .makeEmpty());
    // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
    // instanceof EmptyTermEnum);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
    assertEquals(0, automatonQueryNrHits(aq));
  }

View Full Code Here

        writer.addDocument(doc);
        count++;
      }
      reader = DirectoryReader.open(writer, false);


      Terms terms = MultiFields.getTerms(reader, "body");
      if (terms == null) {
        throw new IllegalArgumentException("need at least one suggestion");
      }


      // Move all ngrams into an FST:
      TermsEnum termsEnum = terms.iterator(null);


      Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
      Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);


      IntsRef scratchInts = new IntsRef();

View Full Code Here

      final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();


      final IndexReader reader = searcher.getIndexReader();
      if (reader.maxDoc() > 0) {
        for (final AtomicReaderContext ctx : reader.leaves()) {
          Terms terms = ctx.reader().terms(F_WORD);
          if (terms != null)
            termsEnums.add(terms.iterator(null));
        }
      }
      
      boolean isEmpty = termsEnums.isEmpty();

View Full Code Here

    private final TermsEnum termsEnum;
    private int minNumDocs;
    private long freq;


    HighFrequencyIterator() throws IOException {
      Terms terms = MultiFields.getTerms(reader, field);
      if (terms != null) {
        termsEnum = terms.iterator(null);
      } else {
        termsEnum = null;
      }
      minNumDocs = (int)(thresh * (float)reader.numDocs());
    }

View Full Code Here

      throw new IOException("You must first call Classifier#train");
    }
    double max = - Double.MAX_VALUE;
    BytesRef foundClass = new BytesRef();


    Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef next;
    String[] tokenizedDoc = tokenizeDoc(inputDocument);
    while ((next = termsEnum.next()) != null) {
      double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
      if (clVal > max) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.Terms

cc.twittertools.index.ExtractTermStatisticsFromIndex

com.basho.yokozuna.handler.EntropyData

com.browseengine.bobo.facets.data.FacetDataCache

com.browseengine.bobo.facets.data.MultiValueFacetDataCache

com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache

com.browseengine.bobo.facets.impl.CompactMultiValueFacetHandler

com.browseengine.bobo.sort.SortCollectorImpl

org.apache.blur.index.ExitableReader$ExitableFields

org.apache.blur.lucene.warmup.IndexWarmup

org.apache.blur.manager.IndexManager

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.