Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.Automaton


 
  // LUCENE-3849
  public void testStopwordsPosIncHole2() throws Exception {
    // use two stopfilters for testing here
    Directory dir = newDirectory();
    final Automaton secondSet = BasicAutomata.makeString("foobar");
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
View Full Code Here


      }
    } else if (clazz.equals("lucene.WildcardQuery")) {
      WildcardQuery wq = (WildcardQuery)q;
      Term t = wq.getTerm();
      setString(n, "text", getString(n, "text") + ", term=" + t);
      Automaton a = WildcardQuery.toAutomaton(t);
      addAutomaton(n, a);
    } else if (clazz.equals("lucene.TermRangeQuery")) {
      TermRangeQuery rq = (TermRangeQuery)q;
      setString(n, "text", getString(n, "text") + ", inclLower=" + rq.includesLower() + ", inclUpper=" + rq.includesUpper());
      Object n1 = create("node");
      setString(n1, "text", "lowerTerm=" + rq.getField() + ":" + rq.getLowerTerm() + "'");
      add(n, n1);
      n1 = create("node");
      setString(n1, "text", "upperTerm=" + rq.getField() + ":" + rq.getUpperTerm() + "'");
      add(n, n1);
      try {
        addTermsEnum(n, TermRangeQuery.class, rq.getField(), rq);
      } catch (Exception e) {
        e.printStackTrace();
        n1 = create("node");
        setString(n1, "text", "TermEnum: Exception " + e.getMessage());
        add(n, n1);
      }
    } else if (q instanceof AutomatonQuery) {
      AutomatonQuery aq = (AutomatonQuery)q;
      setString(n, "text", getString(n, "text") + ", " + aq.toString());
      // get automaton
      try {
        java.lang.reflect.Field aField = AutomatonQuery.class.getDeclaredField("automaton");
        aField.setAccessible(true);
        Automaton a = (Automaton)aField.get(aq);
        addAutomaton(n, a);
      } catch (Exception e) {
        e.printStackTrace();
        Object n1 = create("node");
        setString(n1, "text", "Automaton: Exception " + e.getMessage());
View Full Code Here

        }

        try {
            List<Term> terms = new ArrayList<Term>();
            Terms t = MultiFields.getTerms(reader, FieldNames.FULLTEXT);
            Automaton a = WildcardQuery.toAutomaton(newFulltextTerm(token));
            CompiledAutomaton ca = new CompiledAutomaton(a);
            TermsEnum te = ca.getTermsEnum(t);
            BytesRef text;
            while ((text = te.next()) != null) {
                terms.add(newFulltextTerm(text.utf8ToString()));
View Full Code Here

        }

        try {
            List<Term> terms = new ArrayList<Term>();
            Terms t = MultiFields.getTerms(reader, FieldNames.FULLTEXT);
            Automaton a = WildcardQuery.toAutomaton(newFulltextTerm(token));
            CompiledAutomaton ca = new CompiledAutomaton(a);
            TermsEnum te = ca.getTermsEnum(t);
            BytesRef text;
            while ((text = te.next()) != null) {
                terms.add(newFulltextTerm(text.utf8ToString()));
View Full Code Here

      // NOTE: not great that we ask the suggester to give
      // us the "answer key" (ie maybe we have a bug in
      // suggester.toLevA ...) ... but testRandom2() fixes
      // this:
      Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
      assertTrue(automaton.isDeterministic());
      // TODO: could be faster... but its slowCompletor for a reason
      BytesRef spare = new BytesRef();
      for (TermFreqPayload2 e : slowCompletor) {
        spare.copyChars(e.analyzedForm);
        Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
        for (IntsRef intsRef : finiteStrings) {
          State p = automaton.getInitialState();
          BytesRef ref = Util.toBytesRef(intsRef, spare);
          boolean added = false;
          for (int i = ref.offset; i < ref.length; i++) {
            State q = p.step(ref.bytes[i] & 0xff);
            if (q == null) {
View Full Code Here

        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;

      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
View Full Code Here

        maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
      LevenshteinAutomata builder =
        new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

      for (int i = runAutomata.size(); i <= maxDistance; i++) {
        Automaton a = builder.toAutomaton(i);
        //System.out.println("compute automaton n=" + i);
        // constant prefix
        if (realPrefixLength > 0) {
          Automaton prefix = BasicAutomata.makeString(
            UnicodeUtil.newString(termText, 0, realPrefixLength));
          a = BasicOperations.concatenate(prefix, a);
        }
        runAutomata.add(new CompiledAutomaton(a, true, false));
      }
View Full Code Here

   * determinized)
   */
  public void testNFA() throws IOException {
    // accept this or three, the union is an NFA (two transitions for 't' from
    // initial state)
    Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
        BasicAutomata.makeString("three"));
    assertAutomatonHits(2, nfa);
  }
View Full Code Here

  /**
   * Test that rewriting to a prefix query works as expected, preserves
   * MultiTermQuery semantics.
   */
  public void testRewritePrefix() throws IOException {
    Automaton pfx = BasicAutomata.makeString("do");
    pfx.expandSingleton(); // expand singleton representation for testing
    Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
        .makeAnyString());
    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
    assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
    assertEquals(3, automatonQueryNrHits(aq));
View Full Code Here

    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
    /*
      Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
      w.write(levA.toDot());
      w.close();
      System.out.println("Wrote LevA to out.dot");
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.automaton.Automaton

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.