Examples of org.apache.lucene.index.TermsEnum

org.apache.lucene.index.TermsEnum
Iterator to seek ( {@link #seekCeil(BytesRef)}, {@link #seekExact(BytesRef,boolean)}) or step through ( {@link #next} terms to obtain frequency information ({@link #docFreq}), {@link DocsEnum} or {@link DocsAndPositionsEnum} for the current term ({@link #docs}.
Term enumerations are always ordered by {@link #getComparator}. Each term in the enumeration is greater than the one before it.

The TermsEnum is unpositioned when you first obtain it and you must first successfully call {@link #next} or oneof the seek methods. @lucene.experimental

      if (fieldTerms == null) {
        return null;
      }


      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator(null);
      
      for (int i = 0; i < terms.size(); i++) {
        final Term t = terms.get(i);
        final TermState state = states[i].get(context.ord);
        if (state == null) { /* term doesnt exist in this segment */
          assert termNotInReader(reader, t): "no termstate found but term exists in reader";
          return null;
        }
        te.seekExact(t.bytes(), state);
        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);


        // PhraseQuery on a field that did not index
        // positions.
        if (postingsEnum == null) {
          assert te.seekExact(t.bytes()) : "termstate found but no term exists in reader";
          // term does exist, but has no positions
          throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
        }
        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
      }


      // sort by increasing docFreq order
      if (slop == 0) {
        ArrayUtil.timSort(postingsFreqs);

View Full Code Here

    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(docTermOrds.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
        
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
        
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return docTermOrds.termsEnum();
        }


        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }


        @Override
        public long getSumDocFreq() {
          return -1;
        }


        @Override
        public int getDocCount() {
          return -1;
        }


        @Override
        public long size() {
          return -1;
        }


        @Override
        public boolean hasFreqs() {
          return false;
        }


        @Override
        public boolean hasOffsets() {
          return false;
        }


        @Override
        public boolean hasPositions() {
          return false;
        }
        
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
      
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          termSet.set(termsEnum.ord());
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
      
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {

View Full Code Here

      int termOrd = 0;


      // TODO: use Uninvert?


      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;


        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }


          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }

View Full Code Here

      // pointer==0 means not set
      bytes.copyUsingLengthPrefix(new BytesRef());


      if (terms != null) {
        int termCount = 0;
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          if (termCount++ == termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            break;
          }


          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final long pointer = bytes.copyUsingLengthPrefix(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }

View Full Code Here

    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
        
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
        
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return fcsi.termsEnum();
        }


        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }


        @Override
        public long getSumDocFreq() {
          return -1;
        }


        @Override
        public int getDocCount() {
          return -1;
        }


        @Override
        public long size() {
          return -1;
        }


        @Override
        public boolean hasFreqs() {
          return false;
        }


        @Override
        public boolean hasOffsets() {
          return false;
        }


        @Override
        public boolean hasPositions() {
          return false;
        }
        
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
      
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          long ord = termsEnum.ord();
          if (ord >= 0) {
            termSet.set(ord);
          }
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
      
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {

View Full Code Here

    }
    
    private TermsEnum getTermsEnum(final IndexInput input) throws IOException {
      input.seek(bytes.offset);
      
      return new TermsEnum() {
        private long currentOrd = -1;
        // TODO: maxLength is negative when all terms are merged away...
        private final BytesRef termBuffer = new BytesRef(bytes.maxLength < 0 ? 0 : bytes.maxLength);
        private final BytesRef term = new BytesRef(); // TODO: paranoia?

View Full Code Here

            docsWithField = new Bits.MatchAllBits(maxDoc);
            setDocsWithField = false;
          }
        }


        final TermsEnum termsEnum = termsEnum(terms);


        DocsEnum docs = null;
        FixedBitSet docsWithField = null;
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          visitTerm(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }

View Full Code Here

    final Map<String,Term> fieldTerms = new HashMap<String,Term>();
    for (AtomicReaderContext context : reader.leaves()) {
      for (CategoryListParams clp : fip.getAllCategoryListParams()) {
        Terms terms = context.reader().terms(clp.field);
        if (terms != null) {
          TermsEnum te = terms.iterator(null);
          BytesRef termBytes = null;
          while ((termBytes = te.next()) != null) {
            String term = termBytes.utf8ToString();
            if (term.startsWith(PAYLOAD_TERM_TEXT )) {
              if (term.equals(PAYLOAD_TERM_TEXT)) {
                fieldTerms.put(clp.field, new Term(clp.field, term));
              } else {

View Full Code Here

      try {
        DocsAndPositionsEnum dpe = null;
        if (fields != null) {
          Terms terms = fields.terms(term.field());
          if (terms != null) {
            TermsEnum te = terms.iterator(null); // no use for reusing
            if (te.seekExact(term.bytes())) {
              // we're not expected to be called for deleted documents
              dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
            }
          }
        }
        return dpe;
      } catch (IOException ioe) {

View Full Code Here

        assert termsDocCount <= maxDoc;
        if (termsDocCount == maxDoc) {
          // Fast case: all docs have this field:
          return new Bits.MatchAllBits(maxDoc);
        }
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (res == null) {
            // lazy init
            res = new FixedBitSet(maxDoc);
          }


          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          // TODO: use bulk API
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.TermsEnum

cc.twittertools.index.ExtractTermStatisticsFromIndex

com.basho.yokozuna.handler.EntropyData

com.browseengine.bobo.facets.data.FacetDataCache

com.browseengine.bobo.facets.data.MultiValueFacetDataCache

com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache

com.browseengine.bobo.facets.impl.CompactMultiValueFacetHandler

com.browseengine.bobo.sort.SortCollectorImpl

org.apache.blur.lucene.warmup.IndexTracer

org.apache.blur.manager.IndexManager

org.apache.blur.manager.writer.IndexImporter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.