Package org.apache.lucene.index

Examples of org.apache.lucene.index.DocsAndPositionsEnum


        setOther(postings, reuse); // postings.other = reuse
      }
      return postings.reset(liveDocs, termState);
    } else {
      if (reuse instanceof PulsingDocsAndPositionsEnum) {
        DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
                                                                              flags);
        setOther(wrapped, reuse); // wrapped.other = reuse
        return wrapped;
      } else {
        return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, flags);
View Full Code Here


      hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
      hasPayloads = fieldInfo.hasPayloads();

      BytesRef term;
      DocsEnum docsEnum = null;
      DocsAndPositionsEnum docsAndPositionsEnum = null;
      final TermsEnum termsEnum = termsIn.iterator(null);
      int termOffset = 0;

      final IntArrayWriter scratch = new IntArrayWriter();

      // Used for payloads, if any:
      final RAMOutputStream ros = new RAMOutputStream();

      // if (DEBUG) {
      //   System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
      // }

      while ((term = termsEnum.next()) != null) {
        final int docFreq = termsEnum.docFreq();
        final long totalTermFreq = termsEnum.totalTermFreq();

        // if (DEBUG) {
        //   System.out.println("  term=" + term.utf8ToString());
        // }

        termOffsets[count] = termOffset;

        if (termBytes.length < (termOffset + term.length)) {
          termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
        }
        System.arraycopy(term.bytes, term.offset, termBytes, termOffset, term.length);
        termOffset += term.length;
        termOffsets[count+1] = termOffset;

        if (hasPos) {
          docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
        } else {
          docsEnum = termsEnum.docs(null, docsEnum);
        }

        final TermAndSkip ent;

        final DocsEnum docsEnum2;
        if (hasPos) {
          docsEnum2 = docsAndPositionsEnum;
        } else {
          docsEnum2 = docsEnum;
        }

        int docID;

        if (docFreq <= lowFreqCutoff) {

          ros.reset();

          // Pack postings for low-freq terms into a single int[]:
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            scratch.add(docID);
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              scratch.add(freq);
              if (hasPos) {
                for(int pos=0;pos<freq;pos++) {
                  scratch.add(docsAndPositionsEnum.nextPosition());
                  if (hasOffsets) {
                    scratch.add(docsAndPositionsEnum.startOffset());
                    scratch.add(docsAndPositionsEnum.endOffset());
                  }
                  if (hasPayloads) {
                    final BytesRef payload = docsAndPositionsEnum.getPayload();
                    if (payload != null) {
                      scratch.add(payload.length);
                      ros.writeBytes(payload.bytes, payload.offset, payload.length);
                    } else {
                      scratch.add(0);
                    }
                  }
                }
              }
            }
          }

          final byte[] payloads;
          if (hasPayloads) {
            ros.flush();
            payloads = new byte[(int) ros.length()];
            ros.writeTo(payloads, 0);
          } else {
            payloads = null;
          }

          final int[] postings = scratch.get();
       
          ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
        } else {
          final int[] docs = new int[docFreq];
          final int[] freqs;
          final int[][] positions;
          final byte[][][] payloads;
          if (hasFreq) {
            freqs = new int[docFreq];
            if (hasPos) {
              positions = new int[docFreq][];
              if (hasPayloads) {
                payloads = new byte[docFreq][][];
              } else {
                payloads = null;
              }
            } else {
              positions = null;
              payloads = null;
            }
          } else {
            freqs = null;
            positions = null;
            payloads = null;
          }

          // Use separate int[] for the postings for high-freq
          // terms:
          int upto = 0;
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            docs[upto] = docID;
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              freqs[upto] = freq;
              if (hasPos) {
                final int mult;
                if (hasOffsets) {
                  mult = 3;
                } else {
                  mult = 1;
                }
                if (hasPayloads) {
                  payloads[upto] = new byte[freq][];
                }
                positions[upto] = new int[mult*freq];
                int posUpto = 0;
                for(int pos=0;pos<freq;pos++) {
                  positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
                  if (hasPayloads) {
                    BytesRef payload = docsAndPositionsEnum.getPayload();
                    if (payload != null) {
                      byte[] payloadBytes = new byte[payload.length];
                      System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
                      payloads[upto][pos] = payloadBytes;
                    }
                  }
                  posUpto++;
                  if (hasOffsets) {
                    positions[upto][posUpto++] = docsAndPositionsEnum.startOffset();
                    positions[upto][posUpto++] = docsAndPositionsEnum.endOffset();
                  }
                }
              }
            }

View Full Code Here

    writer.close();
   

    IndexSearcher searcher = newSearcher(reader);
   
    DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(),
                                                                MultiFields.getLiveDocs(searcher.getIndexReader()),
                                                                "field",
                                                                new BytesRef("1"));
    pos.nextDoc();
    // first token should be at position 0
    assertEquals(0, pos.nextPosition());
   
    pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(),
                                           MultiFields.getLiveDocs(searcher.getIndexReader()),
                                           "field",
                                           new BytesRef("2"));
    pos.nextDoc();
    // second token should be at position 2
    assertEquals(2, pos.nextPosition());
   
    PhraseQuery q;
    ScoreDoc[] hits;

    q = new PhraseQuery();
View Full Code Here

    writer.addDocument(doc);

    final IndexReader readerFromWriter = writer.getReader();
    AtomicReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter);

    DocsAndPositionsEnum tp = r.termPositionsEnum(new Term("content", "a"));
   
    int count = 0;
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
    assertEquals(0, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());

    // only one doc has "a"
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());

    IndexSearcher is = newSearcher(readerFromWriter);
 
    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
View Full Code Here

      Terms tvs = tvFields.terms(field);
      assertNotNull(tvs);
      assertEquals(2, tvs.size());
      TermsEnum tvsEnum = tvs.iterator(null);
      assertEquals(new BytesRef("abc"), tvsEnum.next());
      final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
      if (field.equals("tv")) {
        assertNull(dpEnum);
      } else {
        assertNotNull(dpEnum);
      }
View Full Code Here

        if (state == null) { /* term doesnt exist in this segment */
          assert termNotInReader(reader, t): "no termstate found but term exists in reader";
          return null;
        }
        te.seekExact(t.bytes(), state);
        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);

        // PhraseQuery on a field that did not index
        // positions.
        if (postingsEnum == null) {
          assert te.seekExact(t.bytes()) : "termstate found but no term exists in reader";
View Full Code Here

    private int curDocID = -1;
    private int lastRequestedDocID;

    private DocsAndPositionsEnum getDPE() {
      try {
        DocsAndPositionsEnum dpe = null;
        if (fields != null) {
          Terms terms = fields.terms(term.field());
          if (terms != null) {
            TermsEnum te = terms.iterator(null); // no use for reusing
            if (te.seekExact(term.bytes())) {
View Full Code Here

        return more || (freq != 0);
      }

      protected void processPayload(Similarity similarity) throws IOException {
        if (termSpans.isPayloadAvailable()) {
          final DocsAndPositionsEnum postings = termSpans.getPostings();
          payload = postings.getPayload();
          if (payload != null) {
            payloadScore = function.currentScore(doc, term.field(),
                                                 spans.start(), spans.end(), payloadsSeen, payloadScore,
                                                 docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
          } else {
View Full Code Here

    final AtomicReader reader = getOnlySegmentReader(writer.getReader());
    final Fields fields = reader.getTermVectors(0);
    final Terms terms = fields.terms("f");
    final TermsEnum te = terms.iterator(null);
    assertEquals(new BytesRef("a"), te.next());
    final DocsAndPositionsEnum dpe = te.docsAndPositions(null, null);
    assertEquals(0, dpe.nextDoc());
    assertEquals(2, dpe.freq());
    assertEquals(0, dpe.nextPosition());
    assertEquals(0, dpe.startOffset());
    final int endOffset = dpe.endOffset();
    assertEquals(1 + positionGap, dpe.nextPosition());
    assertEquals(1 + endOffset + offsetGap, dpe.endOffset());
    assertEquals(null, te.next());
    reader.close();
    writer.close();
    writer.w.getDirectory().close();
  }
View Full Code Here

          final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
        
          while(iwTermsIter.next() != null) {
            assertNotNull(memTermsIter.next());
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null);
            DocsAndPositionsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null);
            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
              for (int i = 0; i < iwDocsAndPos.freq(); i++) {
                assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
                if (offsets) {
                  assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
                  assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
                }
              }
             
            }
           
          }
        } else {
          while(iwTermsIter.next() != null) {
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            DocsEnum iwDocsAndPos = iwTermsIter.docs(null, null);
            DocsEnum memDocsAndPos = memTermsIter.docs(null, null);
            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
            }
          }
        }
      }
     
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.DocsAndPositionsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.