Examples of org.apache.lucene.index.DocsAndPositionsEnum

org.apache.lucene.index.DocsAndPositionsEnum
Also iterates through positions.

    // waste CPU merging them:
    _posList.clear();
    _doc = _queue.top().docID();


    // merge sort all positions together
    DocsAndPositionsEnum postings;
    do {
      postings = _queue.top();


      final int freq = postings.freq();
      for (int i = 0; i < freq; i++) {
        _posList.add(postings.nextPosition());
      }


      if (postings.nextDoc() != NO_MORE_DOCS) {
        _queue.updateTop();
      } else {
        _queue.pop();
      }
    } while (_queue.size() > 0 && _queue.top().docID() == _doc);

View Full Code Here

  }


  @Override
  public final int advance(int target) throws IOException {
    while (_queue.top() != null && target > _queue.top().docID()) {
      DocsAndPositionsEnum postings = _queue.pop();
      if (postings.advance(target) != NO_MORE_DOCS) {
        _queue.add(postings);
      }
    }
    return nextDoc();
  }

View Full Code Here

    DocsQueue(List<DocsAndPositionsEnum> docsEnums) throws IOException {
      super(docsEnums.size());


      Iterator<DocsAndPositionsEnum> i = docsEnums.iterator();
      while (i.hasNext()) {
        DocsAndPositionsEnum postings = i.next();
        if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          add(postings);
        }
      }
    }

View Full Code Here

          case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break;
          case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break;
          default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break;
        }
        // TODO: cast to DocsAndPositionsEnum?
        DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags);
        if (docsAndPositions != null) {
          return docsAndPositions;
        }
      }
      flags |= DocsEnum.FLAG_FREQS;

View Full Code Here

    
  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
    Map<Integer,String> highlights = new HashMap<Integer,String>();
    
    // reuse in the real sense... for docs in same segment we just advance our old enum
    DocsAndPositionsEnum postings[] = null;
    TermsEnum termsEnum = null;
    int lastLeaf = -1;


    PassageFormatter fieldFormatter = getFormatter(field);
    if (fieldFormatter == null) {

View Full Code Here

    }
    PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
    float weights[] = new float[terms.length];
    // initialize postings
    for (int i = 0; i < terms.length; i++) {
      DocsAndPositionsEnum de = postings[i];
      int pDoc;
      if (de == EMPTY) {
        continue;
      } else if (de == null) {
        postings[i] = EMPTY; // initially
        if (!termsEnum.seekExact(terms[i], true)) {
          continue; // term not found
        }
        de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
        if (de == null) {
          // no positions available
          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
        }
        pDoc = de.advance(doc);
      } else {
        pDoc = de.docID();
        if (pDoc < doc) {
          pDoc = de.advance(doc);
        }
      }


      if (doc == pDoc) {
        weights[i] = scorer.weight(contentLength, de.freq());
        de.nextPosition();
        pq.add(new OffsetsEnum(de, i));
      }
    }
    
    pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
    
    PriorityQueue<Passage> passageQueue = new PriorityQueue<Passage>(n, new Comparator<Passage>() {
      @Override
      public int compare(Passage left, Passage right) {
        if (left.score < right.score) {
          return -1;
        } else if (left.score > right.score) {
          return 1;
        } else {
          return left.startOffset - right.startOffset;
        }
      }
    });
    Passage current = new Passage();
    
    OffsetsEnum off;
    while ((off = pq.poll()) != null) {
      final DocsAndPositionsEnum dp = off.dp;
      int start = dp.startOffset();
      if (start == -1) {
        throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
      }
      int end = dp.endOffset();
      if (start >= current.endOffset) {
        if (current.startOffset >= 0) {
          // finalize current
          current.score *= scorer.norm(current.startOffset);
          // new sentence: first add 'current' to queue 
          if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
            current.reset(); // can't compete, just reset it
          } else {
            passageQueue.offer(current);
            if (passageQueue.size() > n) {
              current = passageQueue.poll();
              current.reset();
            } else {
              current = new Passage();
            }
          }
        }
        // if we exceed limit, we are done
        if (start >= contentLength) {
          Passage passages[] = new Passage[passageQueue.size()];
          passageQueue.toArray(passages);
          for (Passage p : passages) {
            p.sort();
          }
          // sort in ascending order
          Arrays.sort(passages, new Comparator<Passage>() {
            @Override
            public int compare(Passage left, Passage right) {
              return left.startOffset - right.startOffset;
            }
          });
          return passages;
        }
        // advance breakiterator
        assert BreakIterator.DONE < 0;
        current.startOffset = Math.max(bi.preceding(start+1), 0);
        current.endOffset = Math.min(bi.next(), contentLength);
      }
      int tf = 0;
      while (true) {
        tf++;
        current.addMatch(start, end, terms[off.id]);
        if (off.pos == dp.freq()) {
          break; // removed from pq
        } else {
          off.pos++;
          dp.nextPosition();
          start = dp.startOffset();
          end = dp.endOffset();
        }
        if (start >= current.endOffset) {
          pq.offer(off);
          break;
        }

View Full Code Here

      return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
    }


    @Override
    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
      final DocsAndPositionsEnum inReuse;
      final SortingDocsAndPositionsEnum wrapReuse;
      if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
        // if we're asked to reuse the given DocsEnum and it is Sorting, return
        // the wrapped one, since some Codecs expect it.
        wrapReuse = (SortingDocsAndPositionsEnum) reuse;
        inReuse = wrapReuse.getWrapped();
      } else {
        wrapReuse = null;
        inReuse = reuse;
      }


      final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
      if (inDocsAndPositions == null) {
        return null;
      }


      // we ignore the fact that offsets may be stored but not asked for,

View Full Code Here

      Terms tvs = tvFields.terms(field);
      assertNotNull(tvs);
      assertEquals(2, tvs.size());
      TermsEnum tvsEnum = tvs.iterator(null);
      assertEquals(new BytesRef("abc"), tvsEnum.next());
      final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
      if (field.equals("tv")) {
        assertNull(dpEnum);
      } else {
        assertNotNull(dpEnum);
      }

View Full Code Here

      return;
    }


    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
    
    int numDocs = reader.maxDoc();
    
    while ((text = termsEnum.next()) != null) {
      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {
        // null snippet
        return;
      }


      dpEnum.nextDoc();
      
      // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
      final float weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( new Term(fieldName, text) ) + 1 ) ) + 1.0 );


      final int freq = dpEnum.freq();
      
      for(int i = 0;i < freq;i++) {
        int pos = dpEnum.nextPosition();
        if (dpEnum.startOffset() < 0) {
          return; // no offsets, null snippet
        }
        termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) );
      }
    }
    
    // sort by position
    Collections.sort(termList);

View Full Code Here

  }


  @Override
  public final int advance(int target) throws IOException {
    while (_queue.top() != null && target > _queue.top().docID()) {
      DocsAndPositionsEnum postings = _queue.pop();
      if (postings.advance(target) != NO_MORE_DOCS) {
        _queue.add(postings);
      }
    }
    return nextDoc();
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.DocsAndPositionsEnum

com.browseengine.bobo.facets.data.MultiValueFacetDataCache$AllocOnlyLoader

com.browseengine.bobo.search.section.IntMetaDataCache

com.browseengine.bobo.sort.SortCollectorImpl

org.apache.blur.lucene.warmup.IndexTracer

org.apache.lucene.analysis.core.TestClassicAnalyzer

org.apache.lucene.analysis.sinks.TestTeeSinkTokenFilter

org.apache.lucene.analysis.TestCachingTokenFilter

org.apache.lucene.analysis.TestMockAnalyzer

org.apache.lucene.codecs.lucene41.TestBlockPostingsFormat3

org.apache.lucene.codecs.memory.DirectPostingsFormat$DirectField

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.