Examples of org.apache.lucene.util.automaton.Automaton

Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.Automaton

org.apache.lucene.util.automaton.Automaton
Finite-state automaton with regular expression operations.
Class invariants:
- An automaton is either represented explicitly (with {@link State} and{@link Transition} objects) or with a singleton string (see{@link #getSingleton()} and {@link #expandSingleton()}) in case the automaton is known to accept exactly one string. (Implicitly, all states and transitions of an automaton are reachable from its initial state.)
- Automata are always reduced (see {@link #reduce()}) and have no transitions to dead states (see {@link #removeDeadTransitions()}).
- If an automaton is nondeterministic, then {@link #isDeterministic()}returns false (but the converse is not required).
- Automata provided as input to operations are generally assumed to be disjoint.
If the states or transitions are manipulated manually, the {@link #restoreInvariant()} and {@link #setDeterministic(boolean)} methodsshould be used afterwards to restore representation invariants that are assumed by the built-in automata operations.

Note: This class has internal mutable state and is not thread safe. It is the caller's responsibility to ensure any necessary synchronization if you wish to use the same Automaton from multiple threads. In general it is instead recommended to use a {@link RunAutomaton} for multithreaded matching: it is immutable, thread safe, and much faster.
@lucene.experimental

      new Token[] {
        token("xyz", 1, 1),
        token("abc", 0, 3),
        token("def", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicOperations.union(
                                                     join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")),
                                                     BasicAutomata.makeString("abc"));
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }

View Full Code Here

        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }

View Full Code Here

  public void testStartsWithHole() throws Exception {
    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = join(HOLE_A, SEP_A, s2a("abc"));
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }

View Full Code Here

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 10),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicOperations.union(BasicAutomata.makeString("a"),
                                                     BasicAutomata.makeString("X"));
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }

View Full Code Here

        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;


      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();    
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);


      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);

View Full Code Here

  
  // LUCENE-3849
  public void testStopwordsPosIncHole2() throws Exception {
    // use two stopfilters for testing here
    Directory dir = newDirectory();
    final Automaton secondSet = BasicAutomata.makeString("foobar");
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);

View Full Code Here


      // NOTE: not great that we ask the suggester to give
      // us the "answer key" (ie maybe we have a bug in
      // suggester.toLevA ...) ... but testRandom2() fixes
      // this:
      Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
      assertTrue(automaton.isDeterministic());
      // TODO: could be faster... but its slowCompletor for a reason
      BytesRef spare = new BytesRef();
      for (TermFreq2 e : slowCompletor) {
        spare.copyChars(e.analyzedForm);
        Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
        for (IntsRef intsRef : finiteStrings) {
          State p = automaton.getInitialState();
          BytesRef ref = Util.toBytesRef(intsRef, spare);
          boolean added = false;
          for (int i = ref.offset; i < ref.length; i++) {
            State q = p.step(ref.bytes[i] & 0xff);
            if (q == null) {

View Full Code Here

        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;


      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();    
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);


      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);

View Full Code Here


    //System.out.println("lookup key=" + key + " num=" + num);
    final BytesRef utf8Key = new BytesRef(key);
    try {


      Automaton lookupAutomaton = toLookupAutomaton(key);


      final CharsRef spare = new CharsRef();


      //System.out.println("  now intersect exactFirst=" + exactFirst);

View Full Code Here

    TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());


    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    Automaton automaton = ts2a.toAutomaton(ts);
    ts.close();


    replaceSep(automaton);


    assert SpecialOperations.isFinite(automaton);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.util.automaton.Automaton

org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex

org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex

org.apache.lucene.analysis.core.TestDuelingAnalyzers

org.apache.lucene.analysis.TestGraphTokenizers

org.apache.lucene.analysis.TokenStreamToAutomaton

org.apache.lucene.index.TestIndexWriter

org.apache.lucene.index.TestTermsEnum

org.apache.lucene.search.FuzzyTermsEnum

org.apache.lucene.search.postingshighlight.MultiTermHighlighting

org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.