Examples of UAX29URLEmailTokenizer


Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    maxTokenLength = getInt("maxTokenLength",
                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
  }

  public UAX29URLEmailTokenizer create(Reader input) {
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents
        (String fieldName, Reader reader) {

        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
    checkOneTerm(a, "ザ", "ザ"); // katakana Works
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testMailtoBackwards()  throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "mailto:test@example.org",
        new String[] { "mailto:test", "example.org" });
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testVersion36() throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "this is just a t\u08E6st lucene@apache.org", // new combining mark in 6.1
        new String[] { "this", "is", "just", "a", "t", "st", "lucene@apache.org" });
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents
        (String fieldName, Reader reader) {

        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
    checkOneTerm(a, "ザ", "ザ"); // katakana Works
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testMailtoBackwards()  throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "mailto:test@example.org",
        new String[] { "mailto:test", "example.org" });
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testVersion36() throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "this is just a t\u08E6st lucene@apache.org", // new combining mark in 6.1
        new String[] { "this", "is", "just", "a", "t", "st", "lucene@apache.org" });
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.