Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
      final TermsEnum te = fieldTerms.iterator(null);
     
      for (int i = 0; i < terms.size(); i++) {
        final Term t = terms.get(i);
        final TermState state = states[i].get(context.ord);
        if (state == null) { /* term doesnt exist in this segment */
          assert termNotInReader(reader, t): "no termstate found but term exists in reader";
          return null;
        }
        te.seekExact(t.bytes(), state);
        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);

        // PhraseQuery on a field that did not index
        // positions.
        if (postingsEnum == null) {
          assert te.seekExact(t.bytes()) : "termstate found but no term exists in reader";
          // term does exist, but has no positions
          throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
        }
        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
      }

      // sort by increasing docFreq order
      if (slop == 0) {
        ArrayUtil.timSort(postingsFreqs);
View Full Code Here


    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(docTermOrds.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
       
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
       
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return docTermOrds.termsEnum();
        }

        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }

        @Override
        public long getSumDocFreq() {
          return -1;
        }

        @Override
        public int getDocCount() {
          return -1;
        }

        @Override
        public long size() {
          return -1;
        }

        @Override
        public boolean hasFreqs() {
          return false;
        }

        @Override
        public boolean hasOffsets() {
          return false;
        }

        @Override
        public boolean hasPositions() {
          return false;
        }
       
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
     
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          termSet.set(termsEnum.ord());
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
     
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
View Full Code Here

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
View Full Code Here

      // pointer==0 means not set
      bytes.copyUsingLengthPrefix(new BytesRef());

      if (terms != null) {
        int termCount = 0;
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          if (termCount++ == termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            break;
          }

          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final long pointer = bytes.copyUsingLengthPrefix(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
View Full Code Here

    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
       
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
       
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return fcsi.termsEnum();
        }

        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }

        @Override
        public long getSumDocFreq() {
          return -1;
        }

        @Override
        public int getDocCount() {
          return -1;
        }

        @Override
        public long size() {
          return -1;
        }

        @Override
        public boolean hasFreqs() {
          return false;
        }

        @Override
        public boolean hasOffsets() {
          return false;
        }

        @Override
        public boolean hasPositions() {
          return false;
        }
       
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
     
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          long ord = termsEnum.ord();
          if (ord >= 0) {
            termSet.set(ord);
          }
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
     
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
View Full Code Here

    }
   
    private TermsEnum getTermsEnum(final IndexInput input) throws IOException {
      input.seek(bytes.offset);
     
      return new TermsEnum() {
        private long currentOrd = -1;
        // TODO: maxLength is negative when all terms are merged away...
        private final BytesRef termBuffer = new BytesRef(bytes.maxLength < 0 ? 0 : bytes.maxLength);
        private final BytesRef term = new BytesRef(); // TODO: paranoia?
View Full Code Here

            docsWithField = new Bits.MatchAllBits(maxDoc);
            setDocsWithField = false;
          }
        }

        final TermsEnum termsEnum = termsEnum(terms);

        DocsEnum docs = null;
        FixedBitSet docsWithField = null;
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          visitTerm(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
View Full Code Here

    final Map<String,Term> fieldTerms = new HashMap<String,Term>();
    for (AtomicReaderContext context : reader.leaves()) {
      for (CategoryListParams clp : fip.getAllCategoryListParams()) {
        Terms terms = context.reader().terms(clp.field);
        if (terms != null) {
          TermsEnum te = terms.iterator(null);
          BytesRef termBytes = null;
          while ((termBytes = te.next()) != null) {
            String term = termBytes.utf8ToString();
            if (term.startsWith(PAYLOAD_TERM_TEXT )) {
              if (term.equals(PAYLOAD_TERM_TEXT)) {
                fieldTerms.put(clp.field, new Term(clp.field, term));
              } else {
View Full Code Here

      try {
        DocsAndPositionsEnum dpe = null;
        if (fields != null) {
          Terms terms = fields.terms(term.field());
          if (terms != null) {
            TermsEnum te = terms.iterator(null); // no use for reusing
            if (te.seekExact(term.bytes())) {
              // we're not expected to be called for deleted documents
              dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
            }
          }
        }
        return dpe;
      } catch (IOException ioe) {
View Full Code Here

        assert termsDocCount <= maxDoc;
        if (termsDocCount == maxDoc) {
          // Fast case: all docs have this field:
          return new Bits.MatchAllBits(maxDoc);
        }
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (res == null) {
            // lazy init
            res = new FixedBitSet(maxDoc);
          }

          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          // TODO: use bulk API
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.