Package org.apache.lucene.util

Examples of org.apache.lucene.util.OpenBitSet


  public static final class SimpleDocIdSetFilter extends Filter {
    private OpenBitSet bits;

    public SimpleDocIdSetFilter(int[] docs) {
      bits = new OpenBitSet();
      for(int i = 0; i < docs.length; i++){
        bits.set(docs[i]);
      }
     
    }
View Full Code Here


    // is cacheable:
    assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true);
    // a openbitset filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      public DocIdSet getDocIdSet(IndexReader reader) {
        return new OpenBitSet();
      }
    }, true);
    // a deprecated filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      public BitSet bits(IndexReader reader) {
View Full Code Here

    assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
    // a openbitset filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      @Override
      public DocIdSet getDocIdSet(IndexReader reader) {
        return new OpenBitSet();
      }
    }, true);

    reader.close();
    dir.close();
View Full Code Here

      }
    }

    @Override
    public DocIdSet getDocIdSet(IndexReader reader) {
      final OpenBitSet set = new OpenBitSet(reader.maxDoc());
      final int docBase = docBasePerSub.get(reader);
      final int limit = docBase+reader.maxDoc();
      for (;index < docs.length; index++) {
        final int docId = docs[index];
        if (docId > limit)
          break;
        if (docId >= docBase) {
          set.set(docId-docBase);
        }
      }
      return set.isEmpty()?null:set;
    }
View Full Code Here

    Set<String> idSet = new HashSet<String>();
    idSet.addAll(ids);
   
    int numDocs = reader.numDocs();
   
    OpenBitSet clusterDocBitset = getClusterDocBitset(reader, idSet, this.idField);
   
    log.info("Populating term infos from the index");
   
    /**
     * This code is as that of CachedTermInfo, with one major change, which is to get the document frequency.
     *
     * Since we have deleted the documents out of the cluster, the document frequency for a term should only
     * include the in-cluster documents. The document frequency obtained from TermEnum reflects the frequency
     * in the entire index. To get the in-cluster frequency, we need to query the index to get the term
     * frequencies in each document. The number of results of this call will be the in-cluster document
     * frequency.
     */
   
    TermEnum te = reader.terms(new Term(contentField, ""));
    int count = 0;
   
    Map<String,TermEntry> termEntryMap = new LinkedHashMap<String,TermEntry>();
    do {
      Term term = te.term();
      if (term == null || term.field().equals(contentField) == false) {
        break;
      }
      OpenBitSet termBitset = new OpenBitSet(reader.maxDoc());
     
      // Generate bitset for the term
      TermDocs termDocs = reader.termDocs(term);
     
      while (termDocs.next()) {
        termBitset.set(termDocs.doc());
      }
     
      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();
     
      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.term, entry);
    } while (te.next());
    te.close();
View Full Code Here

  private static OpenBitSet getClusterDocBitset(IndexReader reader,
                                                Set<String> idSet,
                                                String idField) throws IOException {
    int numDocs = reader.numDocs();
   
    OpenBitSet bitset = new OpenBitSet(numDocs);
   
    FieldSelector idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections
        .emptySet());
   
    for (int i = 0; i < numDocs; i++) {
      String id = null;
      // Use Lucene's internal ID if idField is not specified. Else, get it from the document.
      if (idField == null) {
        id = Integer.toString(i);
      } else {
        id = reader.document(i, idFieldSelector).get(idField);
      }
      if (idSet.contains(id)) {
        bitset.set(i);
      }
    }
    log.info("Created bitset for in-cluster documents : {}", bitset.cardinality());
    return bitset;
  }
View Full Code Here

    docIdBitSet = makeDocIdSetOnAgreedBits( iterators ); // before returning hold a copy as cache
    return docIdBitSet;
  }

  private DocIdSet makeDocIdSetOnAgreedBits(final DocIdSetIterator[] iterators) throws IOException {
    final OpenBitSet result = new OpenBitSet( maxDocNumber );
    final int numberOfIterators = iterators.length;

    int targetPosition = findFirstTargetPosition( iterators, result );

    if ( targetPosition == DocIdSetIterator.NO_MORE_DOCS ) {
      return DocIdSet.EMPTY_DOCIDSET;
    }

    // Each iterator can vote "ok" for the current target to
    // be reached; when all agree the bit is set.
    // if an iterator disagrees (it jumped longer), it's current position becomes the new targetPosition
    // for the others and he is considered "first" in the voting round (every iterator votes for himself ;-)

    int i = 0;
    int votes = 0; //could be smarter but would make the code even more complex for a minor optimization out of cycle.
    // enter main loop:
    while ( true ) {
      final DocIdSetIterator iterator = iterators[i];
      int position = targetPosition;
      if ( !iteratorAlreadyOnTargetPosition( targetPosition, iterator ) ) {
        position = iterator.advance( targetPosition );
      }
      if ( position == DocIdSetIterator.NO_MORE_DOCS ) {
        return result;
      } //exit condition
      if ( position == targetPosition ) {
        if ( ++votes == numberOfIterators ) {
          result.fastSet( position );
          votes = 0;
          targetPosition++;
        }
      }
      else {
View Full Code Here

      SortedDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
      } else {
        OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            int ord = dv.getOrd(i);
            if (ord >= 0) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
      }
View Full Code Here

      SortedSetDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
      } else {
        OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            dv.setDocument(i);
            long ord;
            while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
      }
View Full Code Here

  private boolean advanceRpts(PhrasePositions pp) throws IOException {
    if (pp.rptGroup < 0) {
      return true; // not a repeater
    }
    PhrasePositions[] rg = rptGroups[pp.rptGroup];
    OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
    int k0 = pp.rptInd;
    int k;
    while((k=collide(pp)) >= 0) {
      pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
      if (!advancePP(pp)) {
        return false; // exhausted
      }
      if (k != k0) { // careful: mark only those currently in the queue
        bits.set(k); // mark that pp2 need to be re-queued
      }
    }
    // collisions resolved, now re-queue
    // empty (partially) the queue until seeing all pps advanced for resolving collisions
    int n = 0;
    while (bits.cardinality() > 0) {
      PhrasePositions pp2 = pq.pop();
      rptStack[n++] = pp2;
      if (pp2.rptGroup >= 0 && bits.get(pp2.rptInd)) {
        bits.clear(pp2.rptInd);
      }
    }
    // add back to queue
    for (int i=n-1; i>=0; i--) {
      pq.add(rptStack[i]);
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.OpenBitSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.