Examples of OpenBitSet


Examples of org.apache.lucene.util.OpenBitSet

   * this method is not synced.
   */
  public static OpenBitSet getPrimeDocBitSet(Term primeDocTerm, IndexReader reader) throws IOException {
    Object key = reader.getCoreCacheKey();
    final Map<Object, OpenBitSet> primeDocMap = getPrimeDocMap(primeDocTerm);
    OpenBitSet bitSet = primeDocMap.get(key);
    if (bitSet == null) {
      reader.addReaderClosedListener(new ReaderClosedListener() {
        @Override
        public void onClose(IndexReader reader) {
          Object key = reader.getCoreCacheKey();
          LOG.debug("Current size [" + primeDocMap.size() + "] Prime Doc BitSet removing for segment [" + reader + "]");
          primeDocMap.remove(key);
        }
      });
      LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size [" + primeDocMap.size() + "]");
      final OpenBitSet bs = new OpenBitSet(reader.maxDoc());
      primeDocMap.put(key, bs);
      IndexSearcher searcher = new IndexSearcher(reader);
      searcher.search(new TermQuery(primeDocTerm), new Collector() {

        @Override
        public void setScorer(Scorer scorer) throws IOException {

        }

        @Override
        public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
          bs.set(doc);
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
          return false;
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

    OpenBitSet dels;
    OpenBitSet oldDels = null;

    public FakeDeleteIndexReader(IndexReader in) {
      super(in);
      dels = new OpenBitSet(in.maxDoc());
      if (in.hasDeletions()) {
        oldDels = new OpenBitSet(in.maxDoc());
        for (int i = 0; i < in.maxDoc(); i++) {
          if (in.isDeleted(i)) oldDels.set(i);
        }
        dels.or(oldDels);
      }
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

     * Just removes our overlaid deletions - does not undelete the original
     * deletions.
     */
    @Override
    protected void doUndeleteAll() throws CorruptIndexException, IOException {
      dels = new OpenBitSet(in.maxDoc());
      if (oldDels != null) {
        dels.or(oldDels);
      }
    }
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

  public static final class SimpleDocIdSetFilter extends Filter {
    private OpenBitSet bits;

    public SimpleDocIdSetFilter(int[] docs) {
      bits = new OpenBitSet();
      for(int i = 0; i < docs.length; i++){
        bits.set(docs[i]);
      }
     
    }
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

    // is cacheable:
    assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true);
    // a openbitset filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      public DocIdSet getDocIdSet(IndexReader reader) {
        return new OpenBitSet();
      }
    }, true);
    // a deprecated filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      public BitSet bits(IndexReader reader) {
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

    assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
    // a openbitset filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      @Override
      public DocIdSet getDocIdSet(IndexReader reader) {
        return new OpenBitSet();
      }
    }, true);

    reader.close();
    dir.close();
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

      }
    }

    @Override
    public DocIdSet getDocIdSet(IndexReader reader) {
      final OpenBitSet set = new OpenBitSet(reader.maxDoc());
      final int docBase = docBasePerSub.get(reader);
      final int limit = docBase+reader.maxDoc();
      for (;index < docs.length; index++) {
        final int docId = docs[index];
        if (docId > limit)
          break;
        if (docId >= docBase) {
          set.set(docId-docBase);
        }
      }
      return set.isEmpty()?null:set;
    }
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

    Set<String> idSet = new HashSet<String>();
    idSet.addAll(ids);
   
    int numDocs = reader.numDocs();
   
    OpenBitSet clusterDocBitset = getClusterDocBitset(reader, idSet, this.idField);
   
    log.info("Populating term infos from the index");
   
    /**
     * This code is as that of CachedTermInfo, with one major change, which is to get the document frequency.
     *
     * Since we have deleted the documents out of the cluster, the document frequency for a term should only
     * include the in-cluster documents. The document frequency obtained from TermEnum reflects the frequency
     * in the entire index. To get the in-cluster frequency, we need to query the index to get the term
     * frequencies in each document. The number of results of this call will be the in-cluster document
     * frequency.
     */
   
    TermEnum te = reader.terms(new Term(contentField, ""));
    int count = 0;
   
    Map<String,TermEntry> termEntryMap = new LinkedHashMap<String,TermEntry>();
    do {
      Term term = te.term();
      if (term == null || term.field().equals(contentField) == false) {
        break;
      }
      OpenBitSet termBitset = new OpenBitSet(reader.maxDoc());
     
      // Generate bitset for the term
      TermDocs termDocs = reader.termDocs(term);
     
      while (termDocs.next()) {
        termBitset.set(termDocs.doc());
      }
     
      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();
     
      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.term, entry);
    } while (te.next());
    te.close();
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

  private static OpenBitSet getClusterDocBitset(IndexReader reader,
                                                Set<String> idSet,
                                                String idField) throws IOException {
    int numDocs = reader.numDocs();
   
    OpenBitSet bitset = new OpenBitSet(numDocs);
   
    FieldSelector idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections
        .emptySet());
   
    for (int i = 0; i < numDocs; i++) {
      String id = null;
      // Use Lucene's internal ID if idField is not specified. Else, get it from the document.
      if (idField == null) {
        id = Integer.toString(i);
      } else {
        id = reader.document(i, idFieldSelector).get(idField);
      }
      if (idSet.contains(id)) {
        bitset.set(i);
      }
    }
    log.info("Created bitset for in-cluster documents : {}", bitset.cardinality());
    return bitset;
  }
View Full Code Here

Examples of org.apache.lucene.util.OpenBitSet

    docIdBitSet = makeDocIdSetOnAgreedBits( iterators ); // before returning hold a copy as cache
    return docIdBitSet;
  }

  private DocIdSet makeDocIdSetOnAgreedBits(final DocIdSetIterator[] iterators) throws IOException {
    final OpenBitSet result = new OpenBitSet( maxDocNumber );
    final int numberOfIterators = iterators.length;

    int targetPosition = findFirstTargetPosition( iterators, result );

    if ( targetPosition == DocIdSetIterator.NO_MORE_DOCS ) {
      return DocIdSet.EMPTY_DOCIDSET;
    }

    // Each iterator can vote "ok" for the current target to
    // be reached; when all agree the bit is set.
    // if an iterator disagrees (it jumped longer), it's current position becomes the new targetPosition
    // for the others and he is considered "first" in the voting round (every iterator votes for himself ;-)

    int i = 0;
    int votes = 0; //could be smarter but would make the code even more complex for a minor optimization out of cycle.
    // enter main loop:
    while ( true ) {
      final DocIdSetIterator iterator = iterators[i];
      int position = targetPosition;
      if ( !iteratorAlreadyOnTargetPosition( targetPosition, iterator ) ) {
        position = iterator.advance( targetPosition );
      }
      if ( position == DocIdSetIterator.NO_MORE_DOCS ) {
        return result;
      } //exit condition
      if ( position == targetPosition ) {
        if ( ++votes == numberOfIterators ) {
          result.fastSet( position );
          votes = 0;
          targetPosition++;
        }
      }
      else {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.