Package org.apache.lucene.util

Examples of org.apache.lucene.util.OpenBitSet


        System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
      }
      */
    }

    final OpenBitSet isS1 = new OpenBitSet(NUM_DOCS);
    for(int idx=0;idx<NUM_DOCS;idx++) {
      if (random.nextBoolean()) {
        isS1.set(idx);
      }
    }

    final IndexReader r;
    if (true) {
      final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
        .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        .setMergePolicy(newLogMergePolicy());
      iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
      iwc.setMaxBufferedDocs(-1);
      final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);

      for(int idx=0;idx<NUM_DOCS;idx++) {
        final Document doc = new Document();
        String s = isS1.get(idx) ? s1 : s2;
        final Field f = newField("field", s, Field.Index.ANALYZED);
        final int count = _TestUtil.nextInt(random, 1, 4);
        for(int ct=0;ct<count;ct++) {
          doc.add(f);
        }
        riw.addDocument(doc);
      }

      r = riw.getReader();
      riw.close();
    } else {
      r = IndexReader.open(dir);
    }

    /*
    if (VERBOSE) {
      System.out.println("TEST: terms");
      TermEnum termEnum = r.terms();
      while(termEnum.next()) {
        System.out.println("  term=" + termEnum.term() + " len=" + termEnum.term().text().length());
        assertTrue(termEnum.docFreq() > 0);
        System.out.println("    s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
        System.out.println("    s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
        final String s = termEnum.term().text();
        for(int idx=0;idx<s.length();idx++) {
          System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
        }
      }
    }
    */

    assertEquals(NUM_DOCS, r.numDocs());
    assertTrue(r.docFreq(new Term("field", s1)) > 0);
    assertTrue(r.docFreq(new Term("field", s2)) > 0);

    final byte[] payload = new byte[100];

    int num = atLeast(1000);
    for(int iter=0;iter<num;iter++) {

      final String term;
      final boolean doS1;
      if (random.nextBoolean()) {
        term = s1;
        doS1 = true;
      } else {
        term = s2;
        doS1 = false;
      }

      if (VERBOSE) {
        System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
      }
       
      final TermPositions postings = r.termPositions(new Term("field", term));

      int docID = -1;
      while(docID < Integer.MAX_VALUE) {
        final int what = random.nextInt(3);
        if (what == 0) {
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do next()");
          }
          // nextDoc
          int expected = docID+1;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          boolean result = postings.next();
          if (!result) {
            assertEquals(Integer.MAX_VALUE, expected);
            if (VERBOSE) {
              System.out.println("  end");
            }
            break;
          } else {
            docID = postings.doc();
            if (VERBOSE) {
              System.out.println("  got docID=" + docID);
            }
            assertEquals(expected, docID);

            if (random.nextInt(6) == 3) {
              final int freq = postings.freq();
              assertTrue(freq >=1 && freq <= 4);
              for(int pos=0;pos<freq;pos++) {
                assertEquals(pos, postings.nextPosition());
                if (random.nextBoolean() && postings.isPayloadAvailable()) {
                  postings.getPayload(payload, 0);
                }
              }
            }
          }
        } else {
          // advance
          final int targetDocID;
          if (docID == -1) {
            targetDocID = random.nextInt(NUM_DOCS+1);
          } else {
            targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
          }
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do skipTo(" + targetDocID + ")");
          }
          int expected = targetDocID;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
View Full Code Here


  }

  @Override
  public SpanFilterResult bitSpans(IndexReader reader) throws IOException {

    final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
    Spans spans = query.getSpans(reader);
    List<SpanFilterResult.PositionInfo> tmp = new ArrayList<SpanFilterResult.PositionInfo>(20);
    int currentDoc = -1;
    SpanFilterResult.PositionInfo currentInfo = null;
    while (spans.next())
    {
      int doc = spans.doc();
      bits.set(doc);
      if (currentDoc != doc)
      {
        currentInfo = new SpanFilterResult.PositionInfo(doc);
        tmp.add(currentInfo);
        currentDoc = doc;
View Full Code Here

    assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
    // a openbitset filter is always cacheable
    assertDocIdSetCacheable(reader, new Filter() {
      @Override
      public DocIdSet getDocIdSet(IndexReader reader) {
        return new OpenBitSet();
      }
    }, true);

    reader.close();
  }
View Full Code Here

    public SimpleDocIdSetFilter(int[] docs) {
      this.docs = docs;
    }
    @Override
    public DocIdSet getDocIdSet(IndexReader reader) {
      final OpenBitSet set = new OpenBitSet();
      final int limit = docBase+reader.maxDoc();
      for (;index < docs.length; index++) {
        final int docId = docs[index];
        if(docId > limit)
          break;
        set.set(docId-docBase);
      }
      docBase = limit;
      return set.isEmpty()?null:set;
    }
View Full Code Here

 
  /** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
  private void assertLongRangeSplit(final long lower, final long upper, int precisionStep,
    final boolean useBitSet, final Iterator neededBounds, final Iterator neededShifts
  ) throws Exception {
    final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
   
    NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() {
      //@Override
      public void addRange(long min, long max, int shift) {
        assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
        if (useBitSet) for (long l=min; l<=max; l++) {
          assertFalse("ranges should not overlap", bits.getAndSet(l-lower) );
          // extra exit condition to prevent overflow on MAX_VALUE
          if (l == max) break;
        }
        if (neededBounds == null || neededShifts == null)
          return;
        // make unsigned longs for easier display and understanding
        min ^= 0x8000000000000000L;
        max ^= 0x8000000000000000L;
        //System.out.println("new Long(0x"+Long.toHexString(min>>>shift)+"L),new Long(0x"+Long.toHexString(max>>>shift)+"L)/*shift="+shift+"*/,");
        assertEquals( "shift", ((Integer)neededShifts.next()).intValue(), shift);
        assertEquals( "inner min bound", ((Long)neededBounds.next()).longValue(), min>>>shift);
        assertEquals( "inner max bound", ((Long)neededBounds.next()).longValue(), max>>>shift);
      }
    }, precisionStep, lower, upper);
   
    if (useBitSet) {
      // after flipping all bits in the range, the cardinality should be zero
      bits.flip(0,upper-lower+1);
      assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
    }
  }
View Full Code Here

  /** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
  private void assertIntRangeSplit(final int lower, final int upper, int precisionStep,
    final boolean useBitSet, final Iterator neededBounds, final Iterator neededShifts
  ) throws Exception {
    final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
   
    NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() {
      //@Override
      public void addRange(int min, int max, int shift) {
        assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
        if (useBitSet) for (int i=min; i<=max; i++) {
          assertFalse("ranges should not overlap", bits.getAndSet(i-lower) );
          // extra exit condition to prevent overflow on MAX_VALUE
          if (i == max) break;
        }
        if (neededBounds == null)
          return;
        // make unsigned ints for easier display and understanding
        min ^= 0x80000000;
        max ^= 0x80000000;
        //System.out.println("new Integer(0x"+Integer.toHexString(min>>>shift)+"),new Integer(0x"+Integer.toHexString(max>>>shift)+")/*shift="+shift+"*/,");
        assertEquals( "shift", ((Integer)neededShifts.next()).intValue(), shift);
        assertEquals( "inner min bound", ((Integer)neededBounds.next()).intValue(), min>>>shift);
        assertEquals( "inner max bound", ((Integer)neededBounds.next()).intValue(), max>>>shift);
      }
    }, precisionStep, lower, upper);
   
    if (useBitSet) {
      // after flipping all bits in the range, the cardinality should be zero
      bits.flip(0,upper-lower+1);
      assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
    }
  }
View Full Code Here

        valueCount++;
      }
     
      int docCount = 0;
      long ordCount = 0;
      OpenBitSet seenOrds = new OpenBitSet(valueCount);
      Iterator<Number> ordIterator = ords.iterator();
      for (Number v : docToOrdCount) {
        assert v != null;
        int count = v.intValue();
        assert count >= 0;
        docCount++;
        ordCount += count;
       
        long lastOrd = -1;
        for (int i = 0; i < count; i++) {
          Number o = ordIterator.next();
          assert o != null;
          long ord = o.longValue();
          assert ord >= 0 && ord < valueCount;
          assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
          seenOrds.set(ord);
          lastOrd = ord;
        }
      }
      assert ordIterator.hasNext() == false;
     
      assert docCount == maxDoc;
      assert seenOrds.cardinality() == valueCount;
      checkIterator(values.iterator(), valueCount, false);
      checkIterator(docToOrdCount.iterator(), maxDoc, false);
      checkIterator(ords.iterator(), ordCount, false);
      in.addSortedSetField(field, values, docToOrdCount, ords);
    }
View Full Code Here

    }

    BigNestedIntArray nestedArray = new BigNestedIntArray();
    nestedArray.load(maxId, loader);

    OpenBitSet filter = new OpenBitSet(numVals);
    for (int i = 0; i < numVals; i++) {
      if (i % 2 == 0) {
        filter.set(i);
      }
    }

    for (int i = 0; i < maxId; i++) {
      nestedArray.countNoReturnWithFilter(i, count, filter);
View Full Code Here

    public void write(final FieldsConsumer consumer) throws Throwable {
      Arrays.sort(terms);
      final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
      long sumTotalTermCount = 0;
      long sumDF = 0;
      OpenBitSet visitedDocs = new OpenBitSet();
      for (final TermData term : terms) {
        for (int i = 0; i < term.docs.length; i++) {
          visitedDocs.set(term.docs[i]);
        }
        sumDF += term.docs.length;
        sumTotalTermCount += term.write(termsConsumer);
      }
      termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
    }
View Full Code Here

    TermValueList<T> list = (listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
    int negativeValueCount = getNegativeValueCount(reader, field);
    int t = 1; // valid term id starts from 1
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);

    _overflow = false;

    String pre = null;

    int df = 0;
    int minID = -1;
    int maxID = -1;
    int docID = -1;
    int valId = 0;

    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        String strText = text.utf8ToString();
        String val = null;
        int weight = 0;
        String[] split = strText.split("\u0000");
        if (split.length > 1) {
          val = split[0];
          weight = Integer.parseInt(split[split.length - 1]);
        } else {
          continue;
        }

        if (pre == null || !val.equals(pre)) {
          if (pre != null) {
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
          list.add(val);
          df = 0;
          minID = -1;
          maxID = -1;
          valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
          t++;
        }

        Term term = new Term(field, strText);
        DocsEnum docsEnum = reader.termDocsEnum(term);
        if (docsEnum != null) {
          while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            df++;

            if (!loader.add(docID, valId)) {
              logOverflow(fieldName);
            } else {
              weightLoader.add(docID, weight);
            }

            if (docID < minID) minID = docID;
            bitset.fastSet(docID);
            while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
              docID = docsEnum.docID();
              df++;
              if (!loader.add(docID, valId)) {
                logOverflow(fieldName);
              } else {
                weightLoader.add(docID, weight);
              }
              bitset.fastSet(docID);
            }
            if (docID > maxID) maxID = docID;
          }
        }
        pre = val;
      }
      if (pre != null) {
        freqList.add(df);
        minIDList.add(minID);
        maxIDList.add(maxID);
      }
    }

    list.seal();
    // Process minIDList and maxIDList for negative number
    for (int i = 1; i < negativeValueCount/2 + 1; ++i) {
      int top = i;
      int tail = negativeValueCount - i + 1;
      int topValue = minIDList.getInt(top);
      int tailValue = minIDList.getInt(tail);
      minIDList.set(top, tailValue);
      minIDList.set(tail, topValue);
      topValue = maxIDList.getInt(top);
      tailValue = maxIDList.getInt(tail);
      maxIDList.set(top, tailValue);
      maxIDList.set(tail, topValue);
    }

    try {
      _nestedArray.load(maxdoc + 1, loader);
      _weightArray.load(maxdoc + 1, weightLoader);
    } catch (IOException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }

    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc < maxdoc && !_nestedArray.contains(doc, 0, true)) {
      ++doc;
    }
    if (doc < maxdoc) {
      this.minIDs[0] = doc;
      doc = maxdoc - 1;
      while (doc >= 0 && !_nestedArray.contains(doc, 0, true)) {
        --doc;
      }
      this.maxIDs[0] = doc;
    }
    this.freqs[0] = maxdoc - (int) bitset.cardinality();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.OpenBitSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.