Package org.sindice.siren.search.node

Examples of org.sindice.siren.search.node.NodeRegexpQuery

The supported syntax is documented in the {@link RegExp} class.Note this might be different than other regular expression implementations. For some alternatives with different syntax, look under the sandbox.

Note this query can be slow, as it needs to iterate over many terms. In order to prevent extremely slow RegexpQueries, a Regexp term should not start with the expression .*

Code taken from {@link RegexpQuery} and adapted for SIREn.


  public void testRegexps() throws Exception {
    final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
    config.put(KeywordConfigurationKeys.ALLOW_TWIG, false);

    final String df = SirenTestCase.DEFAULT_TEST_FIELD;
    final NodeRegexpQuery q = new NodeRegexpQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "[a-z][123]"));
    this._assertSirenQuery(config, q, "/[a-z][123]/");
    config.put(ConfigurationKeys.LOWERCASE_EXPANDED_TERMS, true);
    this._assertSirenQuery(config, q, "/[A-Z][123]/");
    q.setBoost(0.5f);
    this._assertSirenQuery(config, q, "/[A-Z][123]/^0.5");
    q.setRewriteMethod(MultiNodeTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    config.put(KeywordConfigurationKeys.MULTI_NODE_TERM_REWRITE_METHOD, MultiNodeTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    this._assertSirenQuery(config, q, "/[A-Z][123]/^0.5");
    config.put(KeywordConfigurationKeys.MULTI_NODE_TERM_REWRITE_METHOD, MultiNodeTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);

    final Query escaped = new NodeRegexpQuery(new Term(df, "[a-z]\\/[123]"));
    this._assertSirenQuery(config, escaped, "/[a-z]\\/[123]/");
    final Query escaped2 = new NodeRegexpQuery(new Term(df, "[a-z]\\*[123]"));
    this._assertSirenQuery(config, escaped2, "/[a-z]\\*[123]/");

    final HashMap<String, Analyzer> dtAnalyzers = new HashMap<String, Analyzer>();
    dtAnalyzers.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
    config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dtAnalyzers);
    final NodeBooleanQuery complex = new NodeBooleanQuery();
    complex.add(new NodeRegexpQuery(new Term(df, "[a-z]\\/[123]")), NodeBooleanClause.Occur.MUST);
    complex.add(new NodeTermQuery(new Term(df, "/etc/init.d/")), Occur.MUST);
    complex.add(new NodeTermQuery(new Term(df, "/etc/init[.]d/lucene/")), Occur.SHOULD);
    this._assertSirenQuery(config, complex, "+/[a-z]\\/[123]/ +\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ");

    Query re = new NodeRegexpQuery(new Term(df, "http.*"));
    this._assertSirenQuery(config, re, "/http.*/");

    re = new NodeRegexpQuery(new Term(df, "http~0.5"));
    this._assertSirenQuery(config, re, "/http~0.5/");

    re = new NodeRegexpQuery(new Term(df, "boo"));
    this._assertSirenQuery(config, re, "/boo/");

    this._assertSirenQuery(config, new NodeTermQuery(new Term(df, "/boo/")), "\"/boo/\"");
    this._assertSirenQuery(config, new NodeTermQuery(new Term(df, "/boo/")), "\\/boo\\/");

    config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR);
    final NodeBooleanQuery two = new NodeBooleanQuery();
    two.add(new NodeRegexpQuery(new Term(df, "foo")), Occur.SHOULD);
    two.add(new NodeRegexpQuery(new Term(df, "bar")), Occur.SHOULD);
    this._assertSirenQuery(config, two, "/foo/ /bar/");

    final NodeRegexpQuery regexpQueryexp = new NodeRegexpQuery(new Term(df, "[abc]?[0-9]"));
    this._assertSirenQuery(config, regexpQueryexp, "/[abc]?[0-9]/");
  }
View Full Code Here


  @Test
  public void testRegexQueries()
  throws Exception {
    final Query reg = new LuceneProxyNodeQuery(
      new NodeRegexpQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "s*e"))
    );
    this._assertSirenQuery(reg, "/s*e/");
  }
View Full Code Here

  private Term newTerm(final String value) {
    return new Term(DEFAULT_TEST_FIELD, value);
  }

  private int regexQueryNrHits(final String regex) throws IOException {
    final NodeRegexpQuery query = new NodeRegexpQuery(this.newTerm(regex));
    return searcher.search(dq(query), 5).totalHits;
  }
View Full Code Here

      public Automaton getAutomaton(final String name) {
        if (name.equals("quickBrown")) return quickBrownAutomaton;
        else return null;
      }
    };
    final NodeRegexpQuery query = new NodeRegexpQuery(this.newTerm("<quickBrown>"),
      RegExp.ALL, myProvider);
    assertEquals(1, searcher.search(dq(query), 5).totalHits);
  }
View Full Code Here

  }

  public NodeQuery build(final QueryNode queryNode) throws QueryNodeException {
    final RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;

    final NodeRegexpQuery q = new NodeRegexpQuery(new Term(regexpNode.getFieldAsString(),
        regexpNode.textToBytesRef()));

    final MultiNodeTermQuery.RewriteMethod method = (MultiNodeTermQuery.RewriteMethod) queryNode
        .getTag(MultiNodeTermRewriteMethodProcessor.TAG_ID);
    if (method != null) {
      q.setRewriteMethod(method);
    }
    return q;
  }
View Full Code Here

TOP

Related Classes of org.sindice.siren.search.node.NodeRegexpQuery

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.