Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.Automaton


        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;

      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
View Full Code Here


        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;

      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
View Full Code Here

        }
        int termLength = termText.length;
        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
        String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
        Automaton automaton = builder.toAutomaton(fq.getMaxEdits());
        if (prefixLength > 0) {
          Automaton prefix = BasicAutomata.makeString(UnicodeUtil.newString(termText, 0, prefixLength));
          automaton = BasicOperations.concatenate(prefix, automaton);
        }
        list.add(new CharacterRunAutomaton(automaton) {
          @Override
          public String toString() {
View Full Code Here

      }
    }
    final BytesRef utf8Key = new BytesRef(key);
    try {

      Automaton lookupAutomaton = toLookupAutomaton(key);

      final CharsRef spare = new CharsRef();

      //System.out.println("  now intersect exactFirst=" + exactFirst);
   
View Full Code Here

    return prefixPaths;
  }
 
  final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
    // Analyze surface form:
    Automaton automaton = null;
    TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
    try {

      // Create corresponding automaton: labels are bytes
      // from each analyzed token, with byte 0 used as
View Full Code Here

  }

  final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
    // TODO: is there a Reader from a CharSequence?
    // Turn tokenstream into automaton:
    Automaton automaton = null;
    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
    try {
      automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
    } finally {
      IOUtils.closeWhileHandlingException(ts);
View Full Code Here

 
  // LUCENE-3849
  public void testStopwordsPosIncHole2() throws Exception {
    // use two stopfilters for testing here
    Directory dir = newDirectory();
    final Automaton secondSet = BasicAutomata.makeString("foobar");
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
View Full Code Here

      // From the random terms, pick some ratio and compile an
      // automaton:
      final Set<String> acceptTerms = new HashSet<>();
      final TreeSet<BytesRef> sortedAcceptTerms = new TreeSet<>();
      final double keepPct = random().nextDouble();
      Automaton a;
      if (iter == 0) {
        if (VERBOSE) {
          System.out.println("\nTEST: empty automaton");
        }
        a = BasicAutomata.makeEmpty();
      } else {
        if (VERBOSE) {
          System.out.println("\nTEST: keepPct=" + keepPct);
        }
        for (String s : terms) {
          final String s2;
          if (random().nextDouble() <= keepPct) {
            s2 = s;
          } else {
            s2 = getRandomString();
          }
          acceptTerms.add(s2);
          sortedAcceptTerms.add(new BytesRef(s2));
        }
        a = BasicAutomata.makeStringUnion(sortedAcceptTerms);
      }
     
      if (random().nextBoolean()) {
        if (VERBOSE) {
          System.out.println("TEST: reduce the automaton");
        }
        a.reduce();
      }

      final CompiledAutomaton c = new CompiledAutomaton(a, true, false);

      final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
      final Set<BytesRef> acceptTermsSet = new HashSet<>();
      int upto = 0;
      for(String s : acceptTerms) {
        final BytesRef b = new BytesRef(s);
        acceptTermsArray[upto++] = b;
        acceptTermsSet.add(b);
        assertTrue(accepts(c, b));
      }
      Arrays.sort(acceptTermsArray);

      if (VERBOSE) {
        System.out.println("\nTEST: accept terms (unicode order):");
        for(BytesRef t : acceptTermsArray) {
          System.out.println("  " + t.utf8ToString() + (termsSet.contains(t) ? " (exists)" : ""));
        }
        System.out.println(a.toDot());
      }

      for(int iter2=0;iter2<100;iter2++) {
        final BytesRef startTerm = acceptTermsArray.length == 0 || random().nextBoolean() ? null : acceptTermsArray[random().nextInt(acceptTermsArray.length)];
View Full Code Here

    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader sub = getOnlySegmentReader(r);
    Terms terms = sub.fields().terms("field");
    Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
    CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);   
    TermsEnum te = terms.intersect(ca, null);
    assertEquals("aaa", te.next().utf8ToString());
    assertEquals(0, te.docs(null, null, DocsEnum.FLAG_NONE).nextDoc());
    assertEquals("bbb", te.next().utf8ToString());
View Full Code Here

    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader sub = getOnlySegmentReader(r);
    Terms terms = sub.fields().terms("field");

    Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
    CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);   
    TermsEnum te;
   
    // should seek to startTerm
    te = terms.intersect(ca, new BytesRef("aad"));
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.automaton.Automaton

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.