Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


    }
    assertTermsStatistics(leftTerms, rightTerms);
   
    // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

    TermsEnum leftTermsEnum = leftTerms.iterator(null);
    TermsEnum rightTermsEnum = rightTerms.iterator(null);
    assertTermsEnum(leftTermsEnum, rightTermsEnum, true);
   
    assertTermsSeeking(leftTerms, rightTerms);
   
    if (deep) {
      int numIntersections = atLeast(3);
      for (int i = 0; i < numIntersections; i++) {
        String re = AutomatonTestUtil.randomRegexp(random());
        CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
        if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
          // TODO: test start term too
          TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
          TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
          assertTermsEnum(leftIntersection, rightIntersection, rarely());
        }
      }
    }
  }
View Full Code Here


      }
    }
  }
 
  private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;
    TermsEnum rightEnum = null;
   
    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();
   
    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<BytesRef>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
      leftEnum = leftTerms.iterator(leftEnum);
      BytesRef term = null;
      while ((term = leftEnum.next()) != null) {
        int code = random.nextInt(10);
        if (code == 0) {
          // the term
          tests.add(BytesRef.deepCopyOf(term));
        } else if (code == 1) {
          // truncated subsequence of term
          term = BytesRef.deepCopyOf(term);
          if (term.length > 0) {
            // truncate it
            term.length = random.nextInt(term.length);
          }
        } else if (code == 2) {
          // term, but ensure a non-zero offset
          byte newbytes[] = new byte[term.length+5];
          System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
          tests.add(new BytesRef(newbytes, 5, term.length));
        }
      }
      numPasses++;
    }
   
    ArrayList<BytesRef> shuffledTests = new ArrayList<BytesRef>(tests);
    Collections.shuffle(shuffledTests, random);
   
    for (BytesRef b : shuffledTests) {
      leftEnum = leftTerms.iterator(leftEnum);
      rightEnum = rightTerms.iterator(rightEnum);
     
      assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
      assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
     
      SeekStatus leftStatus;
      SeekStatus rightStatus;
     
      leftStatus = leftEnum.seekCeil(b);
      rightStatus = rightEnum.seekCeil(b);
      assertEquals(leftStatus, rightStatus);
      if (leftStatus != SeekStatus.END) {
        assertEquals(leftEnum.term(), rightEnum.term());
      }
     
      leftStatus = leftEnum.seekCeil(b);
      rightStatus = rightEnum.seekCeil(b);
      assertEquals(leftStatus, rightStatus);
      if (leftStatus != SeekStatus.END) {
        assertEquals(leftEnum.term(), rightEnum.term());
      }
    }
  }
View Full Code Here

    iw.close();
   
    AtomicReader segment = getOnlySegmentReader(ir);
    DocsEnum reuse = null;
    Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
    TermsEnum te = segment.terms("foo").iterator(null);
    while (te.next() != null) {
      reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE);
      allEnums.put(reuse, true);
    }
   
    assertEquals(2, allEnums.size());
   
    allEnums.clear();
    DocsAndPositionsEnum posReuse = null;
    te = segment.terms("foo").iterator(null);
    while (te.next() != null) {
      posReuse = te.docsAndPositions(null, posReuse);
      allEnums.put(posReuse, true);
    }
   
    assertEquals(2, allEnums.size());
   
View Full Code Here

    iw.close();
   
    AtomicReader segment = getOnlySegmentReader(ir);
    DocsEnum reuse = null;
    Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
    TermsEnum te = segment.terms("foo").iterator(null);
    while (te.next() != null) {
      reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE);
      allEnums.put(reuse, true);
    }
   
    assertEquals(4, allEnums.size());
   
    allEnums.clear();
    DocsAndPositionsEnum posReuse = null;
    te = segment.terms("foo").iterator(null);
    while (te.next() != null) {
      posReuse = te.docsAndPositions(null, posReuse);
      allEnums.put(posReuse, true);
    }
   
    assertEquals(4, allEnums.size());
   
View Full Code Here

    for (String field : fields) {
      Set<String> stopWords = new HashSet<String>();
      Terms terms = MultiFields.getTerms(indexReader, field);
      CharsRef spare = new CharsRef();
      if (terms != null) {
        TermsEnum te = terms.iterator(null);
        BytesRef text;
        while ((text = te.next()) != null) {
          if (te.docFreq() > maxDocFreq) {
            UnicodeUtil.UTF8toUTF16(text, spare);
            stopWords.add(spare.toString());
          }
        }
      }
View Full Code Here

      Terms terms = MultiFields.getTerms(indexReader, clp.field);
      if (terms == null) {
        continue;
      }
      Bits liveDocs = MultiFields.getLiveDocs(indexReader);
      TermsEnum te = terms.iterator(null);
      DocsEnum de = null;
      while (te.next() != null) {
        de = _TestUtil.docs(random(), te, liveDocs, de, DocsEnum.FLAG_NONE);
        int cnt = 0;
        while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          cnt++;
        }
        res.put(new CategoryPath(te.term().utf8ToString().split(delim)), cnt);
      }
    }
    return res;
  }
View Full Code Here

  }
 
  public void collectTermContext(IndexReader reader,
      List<AtomicReaderContext> leaves, TermContext[] contextArray,
      Term[] queryTerms) throws IOException {
    TermsEnum termsEnum = null;
    for (AtomicReaderContext context : leaves) {
      final Fields fields = context.reader().fields();
      if (fields == null) {
        // reader has no fields
        continue;
      }
      for (int i = 0; i < queryTerms.length; i++) {
        Term term = queryTerms[i];
        TermContext termContext = contextArray[i];
        final Terms terms = fields.terms(term.field());
        if (terms == null) {
          // field does not exist
          continue;
        }
        termsEnum = terms.iterator(termsEnum);
        assert termsEnum != null;
       
        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes())) {
          if (termContext == null) {
            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
         
        }
       
      }
View Full Code Here

        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
View Full Code Here

    positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = addAttribute(OffsetAttribute.class);
    payloadAttribute = addAttribute(PayloadAttribute.class);
    final boolean hasOffsets = vector.hasOffsets();
    final boolean hasPayloads = vector.hasPayloads();
    final TermsEnum termsEnum = vector.iterator(null);
    BytesRef text;
    DocsAndPositionsEnum dpEnum = null;
    while((text = termsEnum.next()) != null) {
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier
      dpEnum.nextDoc();
      final int freq = dpEnum.freq();
      for (int j = 0; j < freq; j++) {
        int pos = dpEnum.nextPosition();
View Full Code Here

      // null snippet
      return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
   
    int numDocs = reader.maxDoc();
   
    while ((text = termsEnum.next()) != null) {
      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {
        // null snippet
        return;
      }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.