Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Tokenizer


  public void test() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        if (fieldName.contains("payloadsFixed")) {
          TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
          return new TokenStreamComponents(tokenizer, filter);
        } else if (fieldName.contains("payloadsVariable")) {
          TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
View Full Code Here


 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents
        (String fieldName, Reader reader) {

        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
    checkOneTerm(a, "ザ", "ザ"); // katakana Works
View Full Code Here

  @Deprecated
  public void testMailtoBackwards()  throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "mailto:test@example.org",
        new String[] { "mailto:test", "example.org" });
View Full Code Here

  @Deprecated
  public void testVersion36() throws Exception {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
        return new TokenStreamComponents(tokenizer);
      }
    };
    assertAnalyzesTo(a, "this is just a t\u08E6st lucene@apache.org", // new combining mark in 6.1
        new String[] { "this", "is", "just", "a", "t", "st", "lucene@apache.org" });
View Full Code Here

   
    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
        return new TokenStreamComponents(tokenizer, stream);
      }
    };
   
View Full Code Here

   *         {@link CzechStemFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      if(!this.stemExclusionTable.isEmpty())
View Full Code Here

   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new ElisionFilter(result, DEFAULT_ARTICLES);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stopwords);
      if(!excltable.isEmpty())
        result = new SetKeywordMarkerFilter(result, excltable);
      if (matchVersion.onOrAfter(Version.LUCENE_36)) {
        result = new FrenchLightStemFilter(result);
      } else {
        result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
      }
      return new TokenStreamComponents(source, result);
    } else {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new StopFilter(matchVersion, result, stopwords);
      if(!excltable.isEmpty())
        result = new SetKeywordMarkerFilter(result, excltable);
      result = new FrenchStemFilter(result);
View Full Code Here

  public void testRandomStrings() throws Exception {
    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
      }
    };
    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Tokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.