Examples of UAX29URLEmailTokenizer


Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testVersion40() throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_40, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
    // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    maxTokenLength = getInt("maxTokenLength",
                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
  }

  public UAX29URLEmailTokenizer create(Reader input) {
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(input);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

            @Override public String name() {
                return "uax_url_email";
            }

            @Override public Tokenizer create(Reader reader) {
                return new UAX29URLEmailTokenizer(reader);
            }
        }));

        tokenizerFactories.put("path_hierarchy", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
            @Override public String name() {
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

        super(index, indexSettings, name, settings);
        maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    }

    @Override public Tokenizer create(Reader reader) {
        UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
        tokenizer.setMaxTokenLength(maxTokenLength);
        return tokenizer;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents
        (String fieldName, Reader reader) {

        Tokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
    checkOneTerm(a, "ザ", "ザ"); // katakana Works
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
  }

  @Override
  public UAX29URLEmailTokenizer create(Reader input) {
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents
        (String fieldName, Reader reader) {

        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_1, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
    checkOneTerm(a, "ザ", "ザ"); // katakana Works
View Full Code Here

Examples of org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer

  @Deprecated
  public void testMailtoBackwards()  throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_4, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "mailto:test@example.org",
        new String[] { "mailto:test", "example.org" });
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.