Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


    _overflow = false;

    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        String strText = text.utf8ToString();
        list.add(strText);

        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
View Full Code Here


    _overflow = false;

    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        String strText = text.utf8ToString();
        list.add(strText);

        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
View Full Code Here

  private void countTerms() throws Exception {
    termCounts = new HashMap<String,FieldTermCount>();
    numTerms = 0;
    Fields fields = MultiFields.getFields(reader);   
    TermsEnum te = null;
    for (String fld : fields) {
      FieldTermCount ftc = new FieldTermCount();
      ftc.fieldname = fld;
      Terms terms = fields.terms(fld);
      if (terms != null) { // count terms
        te = terms.iterator(te);
        while (te.next() != null) {
          ftc.termCount++;
          numTerms++;
        }
      }
      termCounts.put(fld, ftc);
View Full Code Here

* Utility class to make it easier to handle term vectors.
*/
public class TermVectorMapper {

  public static List<IntPair> map(Terms terms, TermsEnum reuse, boolean acceptTermsOnly, boolean convertOffsets) throws IOException {
    TermsEnum te = terms.iterator(reuse);
    DocsAndPositionsEnum dpe = null;
    List<IntPair> res = new ArrayList<IntPair>();
    while (te.next() != null) {
      DocsAndPositionsEnum newDpe = te.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_OFFSETS);
      if (newDpe == null) { // no positions and no offsets - just add terms if allowed
        if (!acceptTermsOnly) {
          return null;
        }
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      dpe = newDpe;
      // term vectors have only one document, number 0
      if (dpe.nextDoc() == DocsEnum.NO_MORE_DOCS) { // oops
        // treat this as no positions nor offsets
        int freq = (int)te.totalTermFreq();
        if (freq == -1) freq = 0;
        res.add(new IntPair(freq, te.term().utf8ToString()));
        continue;
      }
      IntPair ip = new IntPair(dpe.freq(), te.term().utf8ToString());
      for (int i = 0; i < dpe.freq(); i++) {
        int pos = dpe.nextPosition();
        if (pos != -1) {
          if (ip.positions == null) {
            ip.positions = new int[dpe.freq()];
View Full Code Here

   * @return TermStats[] ordered by terms with highest docFreq first.
   * @throws Exception
   */
  public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String[] fieldNames) throws Exception {
    TermStatsQueue tiq = null;
    TermsEnum te = null;
   
    if (fieldNames != null) {
      Fields fields = MultiFields.getFields(reader);
      if (fields == null) {
        LOG.info("Index with no fields - probably empty or corrupted");
View Full Code Here

  public Term getPrefix() { return prefix; }

  @Override
  protected TermsEnum getTermsEnum(final Terms terms, final AttributeSource atts)
  throws IOException {
    final TermsEnum tenum = terms.iterator(null);

    if (prefix.bytes().length == 0) {
      // no prefix -- match all terms for this field:
      return tenum;
    }
View Full Code Here

    public Scorer scorer(final AtomicReaderContext context,
                         final boolean scoreDocsInOrder,
                         final boolean topScorer, final Bits acceptDocs)
    throws IOException {
      assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
      final TermsEnum termsEnum = this.getTermsEnum(context);
      if (termsEnum == null) {
        return null;
      }

      final DocsAndPositionsEnum docsEnum = termsEnum.docsAndPositions(acceptDocs, null);
      final DocsNodesAndPositionsEnum sirenDocsEnum = NodeTermQuery.this.getDocsNodesAndPositionsEnum(docsEnum);
      return new NodeTermScorer(this, sirenDocsEnum, this.createDocScorer(context));
    }
View Full Code Here

      if (state == null) { // term is not present in that reader
        assert this.termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
        return null;
      }
      //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
      final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
      termsEnum.seekExact(term.bytes(), state);
      return termsEnum;
    }
View Full Code Here

      if (terms == null) {
        // field does not exist
        continue;
      }

      final TermsEnum termsEnum = this.getTermsEnum(query, terms, collector.attributes);
      assert termsEnum != null;

      if (termsEnum == TermsEnum.EMPTY)
        continue;

      // Check comparator compatibility:
      final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
      if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
        throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
      lastTermComp = newTermComp;
      collector.setReaderContext(topReaderContext, context);
      collector.setNextEnum(termsEnum);
      BytesRef bytes;
      while ((bytes = termsEnum.next()) != null) {
        if (!collector.collect(bytes))
          return; // interrupt whole term collection, so also don't iterate other subReaders
      }
    }
  }
View Full Code Here

      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator(null);

      for (int i = 0; i < terms.size(); i++) {
        final Term t = terms.get(i);
        final TermState state = states[i].get(context.ord);
        if (state == null) { /* term doesnt exist in this segment */
          assert this.termNotInReader(reader, t): "no termstate found but term exists in reader";
          return null;
        }
        te.seekExact(t.bytes(), state);

        final DocsNodesAndPositionsEnum postingsEnum = NodePhraseQuery.this.getDocsNodesAndPositionsEnum(te.docsAndPositions(liveDocs, null));

        // PhraseQuery on a field that did not index positions (maybe not a siren field)
        if (postingsEnum == null) {
          assert te.seekExact(t.bytes(), false) : "termstate found but no term exists in reader";
          // term does exist, but has no positions
          throw new IllegalStateException("field \"" + t.field() + "\" was " +
              "indexed without position data; cannot run NodePhraseQuery " +
              "(term=" + t.text() + ")");
        }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.