Examples of DocsAndPositionsEnum


Examples of org.apache.lucene.index.DocsAndPositionsEnum

    }
    PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
    float weights[] = new float[terms.length];
    // initialize postings
    for (int i = 0; i < terms.length; i++) {
      DocsAndPositionsEnum de = postings[i];
      int pDoc;
      if (de == EMPTY) {
        continue;
      } else if (de == null) {
        postings[i] = EMPTY; // initially
        if (!termsEnum.seekExact(terms[i])) {
          continue; // term not found
        }
        de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
        if (de == null) {
          // no positions available
          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
        }
        pDoc = de.advance(doc);
      } else {
        pDoc = de.docID();
        if (pDoc < doc) {
          pDoc = de.advance(doc);
        }
      }

      if (doc == pDoc) {
        weights[i] = scorer.weight(contentLength, de.freq());
        de.nextPosition();
        pq.add(new OffsetsEnum(de, i));
      }
    }
   
    pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
   
    PriorityQueue<Passage> passageQueue = new PriorityQueue<Passage>(n, new Comparator<Passage>() {
      @Override
      public int compare(Passage left, Passage right) {
        if (left.score < right.score) {
          return -1;
        } else if (left.score > right.score) {
          return 1;
        } else {
          return left.startOffset - right.startOffset;
        }
      }
    });
    Passage current = new Passage();
   
    OffsetsEnum off;
    while ((off = pq.poll()) != null) {
      final DocsAndPositionsEnum dp = off.dp;
      int start = dp.startOffset();
      if (start == -1) {
        throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
      }
      int end = dp.endOffset();
      // LUCENE-5166: this hit would span the content limit... however more valid
      // hits may exist (they are sorted by start). so we pretend like we never
      // saw this term, it won't cause a passage to be added to passageQueue or anything.
      assert EMPTY.startOffset() == Integer.MAX_VALUE;
      if (start < contentLength && end > contentLength) {
        continue;
      }
      if (start >= current.endOffset) {
        if (current.startOffset >= 0) {
          // finalize current
          current.score *= scorer.norm(current.startOffset);
          // new sentence: first add 'current' to queue
          if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
            current.reset(); // can't compete, just reset it
          } else {
            passageQueue.offer(current);
            if (passageQueue.size() > n) {
              current = passageQueue.poll();
              current.reset();
            } else {
              current = new Passage();
            }
          }
        }
        // if we exceed limit, we are done
        if (start >= contentLength) {
          Passage passages[] = new Passage[passageQueue.size()];
          passageQueue.toArray(passages);
          for (Passage p : passages) {
            p.sort();
          }
          // sort in ascending order
          Arrays.sort(passages, new Comparator<Passage>() {
            @Override
            public int compare(Passage left, Passage right) {
              return left.startOffset - right.startOffset;
            }
          });
          return passages;
        }
        // advance breakiterator
        assert BreakIterator.DONE < 0;
        current.startOffset = Math.max(bi.preceding(start+1), 0);
        current.endOffset = Math.min(bi.next(), contentLength);
      }
      int tf = 0;
      while (true) {
        tf++;
        BytesRef term = terms[off.id];
        if (term == null) {
          // multitermquery match, pull from payload
          term = off.dp.getPayload();
          assert term != null;
        }
        current.addMatch(start, end, term);
        if (off.pos == dp.freq()) {
          break; // removed from pq
        } else {
          off.pos++;
          dp.nextPosition();
          start = dp.startOffset();
          end = dp.endOffset();
        }
        if (start >= current.endOffset || end > contentLength) {
          pq.offer(off);
          break;
        }
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

      return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
   
    int numDocs = reader.maxDoc();
   
    while ((text = termsEnum.next()) != null) {
      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {
        // null snippet
        return;
      }

      dpEnum.nextDoc();
     
      // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
      final float weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( new Term(fieldName, text) ) + 1 ) ) + 1.0 );

      final int freq = dpEnum.freq();
     
      for(int i = 0;i < freq;i++) {
        int pos = dpEnum.nextPosition();
        if (dpEnum.startOffset() < 0) {
          return; // no offsets, null snippet
        }
        termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) );
      }
    }
   
    // sort by position
    Collections.sort(termList);
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

    // TODO: we could use CachingWrapperFilter, (or consume twice) to allow us to have a true freq()
    // but this would have a performance cost for likely little gain in the user experience, it
    // would only serve to make this method less bogus.
    // instead, we always return freq() = Integer.MAX_VALUE and let PH terminate based on offset...
   
    return new DocsAndPositionsEnum() {
      int currentDoc = -1;
      int currentMatch = -1;
      int currentStartOffset = -1;
      int currentEndOffset = -1;
      TokenStream stream = ts;
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

          final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
        
          while(iwTermsIter.next() != null) {
            assertNotNull(memTermsIter.next());
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null);
            DocsAndPositionsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null);
            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
              for (int i = 0; i < iwDocsAndPos.freq(); i++) {
                assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
                if (offsets) {
                  assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
                  assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
                }
              }
             
            }
           
          }
        } else {
          while(iwTermsIter.next() != null) {
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            DocsEnum iwDocsAndPos = iwTermsIter.docs(null, null);
            DocsEnum memDocsAndPos = memTermsIter.docs(null, null);
            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
            }
          }
        }
      }
     
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

    MemoryIndex memory = new MemoryIndex(true,  random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numIters; i++) { // check reuse
      memory.addField("foo", "bar", analyzer);
      AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
      assertEquals(1, reader.terms("foo").getSumTotalTermFreq());
      DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar"));
      int docid = disi.docID();
      assertEquals(-1, docid);
      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(0, disi.nextPosition());
      assertEquals(0, disi.startOffset());
      assertEquals(3, disi.endOffset());
     
      // now reuse and check again
      TermsEnum te = reader.terms("foo").iterator(null);
      assertTrue(te.seekExact(new BytesRef("bar")));
      disi = te.docsAndPositions(null, disi);
      docid = disi.docID();
      assertEquals(-1, docid);
      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      reader.close();
      memory.reset();
    }
  }
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

    while (termEnum.next() != null) {
      assertNotNull(memTermEnum.next());
      assertThat(termEnum.totalTermFreq(), equalTo(memTermEnum.totalTermFreq()));

      DocsAndPositionsEnum docsPosEnum = termEnum.docsAndPositions(null, null, 0);
      DocsAndPositionsEnum memDocsPosEnum = memTermEnum.docsAndPositions(null, null, 0);
      String currentTerm = termEnum.term().utf8ToString();

      assertThat("Token mismatch for field: " + field_name, currentTerm, equalTo(memTermEnum.term().utf8ToString()));

      docsPosEnum.nextDoc();
      memDocsPosEnum.nextDoc();

      int freq = docsPosEnum.freq();
      assertThat(freq, equalTo(memDocsPosEnum.freq()));
      for (int i = 0; i < freq; i++) {
        String failDesc = " (field:" + field_name + " term:" + currentTerm + ")";
        int memPos = memDocsPosEnum.nextPosition();
        int pos = docsPosEnum.nextPosition();
        assertThat("Position test failed" + failDesc, memPos, equalTo(pos));
        assertThat("Start offset test failed" + failDesc, memDocsPosEnum.startOffset(), equalTo(docsPosEnum.startOffset()));
        assertThat("End offset test failed" + failDesc, memDocsPosEnum.endOffset(), equalTo(docsPosEnum.endOffset()));
        assertThat("Missing payload test failed" + failDesc, docsPosEnum.getPayload(), equalTo(null));
      }
    }
    assertNull("Still some tokens not processed", memTermEnum.next());
  }
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

          case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break;
          case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break;
          default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break;
        }
        // TODO: cast to DocsAndPositionsEnum?
        DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags);
        if (docsAndPositions != null) {
          return docsAndPositions;
        }
      }
      flags |= DocsEnum.FLAG_FREQS;
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

    try {
      TermsEnum termsEnum = terms.iterator(null);
      int sampleCount = 0;
      while (termsEnum.next() != null) {
        if (_hasPositions || _hasOffsets || _hasPayloads) {
          DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(_liveDocs, null);
          int nextDoc;
          do {
            nextDoc = docsAndPositions.nextDoc();
            int freq = docsAndPositions.freq();
            for (int i = 0; i < freq; i++) {
              docsAndPositions.nextPosition();
              if (_hasPayloads) {
                docsAndPositions.getPayload();
              }
              if (traceComplete()) {
                return getResult();
              }
            }
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

   
  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException
    Map<Integer,String> highlights = new HashMap<Integer,String>();
   
    // reuse in the real sense... for docs in same segment we just advance our old enum
    DocsAndPositionsEnum postings[] = null;
    TermsEnum termsEnum = null;
    int lastLeaf = -1;
   
    for (int i = 0; i < docids.length; i++) {
      String content = contents[i];
View Full Code Here

Examples of org.apache.lucene.index.DocsAndPositionsEnum

      TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException {
    PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
    float weights[] = new float[terms.length];
    // initialize postings
    for (int i = 0; i < terms.length; i++) {
      DocsAndPositionsEnum de = postings[i];
      int pDoc;
      if (de == EMPTY) {
        continue;
      } else if (de == null) {
        postings[i] = EMPTY; // initially
        if (!termsEnum.seekExact(terms[i].bytes(), true)) {
          continue; // term not found
        }
        de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
        if (de == null) {
          // no positions available
          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
        }
        pDoc = de.advance(doc);
      } else {
        pDoc = de.docID();
        if (pDoc < doc) {
          pDoc = de.advance(doc);
        }
      }

      if (doc == pDoc) {
        weights[i] = scorer.weight(contentLength, de.freq());
        de.nextPosition();
        pq.add(new OffsetsEnum(de, i));
      }
    }
   
    pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
   
    PriorityQueue<Passage> passageQueue = new PriorityQueue<Passage>(n, new Comparator<Passage>() {
      @Override
      public int compare(Passage left, Passage right) {
        if (left.score < right.score) {
          return -1;
        } else if (left.score > right.score) {
          return 1;
        } else {
          return left.startOffset - right.startOffset;
        }
      }
    });
    Passage current = new Passage();
   
    OffsetsEnum off;
    while ((off = pq.poll()) != null) {
      final DocsAndPositionsEnum dp = off.dp;
      int start = dp.startOffset();
      if (start == -1) {
        throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
      }
      int end = dp.endOffset();
      if (start >= current.endOffset) {
        if (current.startOffset >= 0) {
          // finalize current
          current.score *= scorer.norm(current.startOffset);
          // new sentence: first add 'current' to queue
          if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
            current.reset(); // can't compete, just reset it
          } else {
            passageQueue.offer(current);
            if (passageQueue.size() > n) {
              current = passageQueue.poll();
              current.reset();
            } else {
              current = new Passage();
            }
          }
        }
        // if we exceed limit, we are done
        if (start >= contentLength) {
          Passage passages[] = new Passage[passageQueue.size()];
          passageQueue.toArray(passages);
          for (Passage p : passages) {
            p.sort();
          }
          // sort in ascending order
          Arrays.sort(passages, new Comparator<Passage>() {
            @Override
            public int compare(Passage left, Passage right) {
              return left.startOffset - right.startOffset;
            }
          });
          return passages;
        }
        // advance breakiterator
        assert BreakIterator.DONE < 0;
        current.startOffset = Math.max(bi.preceding(start+1), 0);
        current.endOffset = Math.min(bi.next(), contentLength);
      }
      int tf = 0;
      while (true) {
        tf++;
        current.addMatch(start, end, terms[off.id]);
        if (off.pos == dp.freq()) {
          break; // removed from pq
        } else {
          off.pos++;
          dp.nextPosition();
          start = dp.startOffset();
          end = dp.endOffset();
        }
        if (start >= current.endOffset) {
          pq.offer(off);
          break;
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.