Package org.apache.lucene.analysis.en

Examples of org.apache.lucene.analysis.en.PorterStemFilter


    Tokenizer tokenizer = new SentenceTokenizer(reader);
    TokenStream result = new WordTokenFilter(tokenizer);
    // result = new LowerCaseFilter(result);
    // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
    // The porter stemming is too strict, this is not a bug, this is a feature:)
    result = new PorterStemFilter(result);
    if (!stopWords.isEmpty()) {
      result = new StopFilter(matchVersion, result, stopWords);
    }
    return new TokenStreamComponents(tokenizer, result);
  }
View Full Code Here


    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
    builder.add("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"books"});
  }
View Full Code Here

    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
    builder.add("boOkEd", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"books"});
  }
View Full Code Here

  }

  public void testNoOverrides() throws IOException {
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"book"});
  }
View Full Code Here

        output.add(entry.getValue());
      }
    }
    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
        new StringReader(input.toString()));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, output.toArray(new String[0]));
  }
View Full Code Here

    StemmerOverrideMap build = builder.build();
    for (Entry<String,String> entry : entrySet) {
      if (random().nextBoolean()) {
        Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
            entry.getKey()));
        TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
            tokenizer, build));
        assertTokenStreamContents(stream, new String[] {entry.getValue()});
      }
    }
  }
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(version, reader);
      TokenStream tok = new StandardFilter(version, src);
      tok = new LowerCaseFilter(version, tok);
      tok = new StopFilter(version, tok, StandardAnalyzer.STOP_WORDS_SET);
      tok = new PorterStemFilter(tok);
      return new TokenStreamComponents(src, tok);
  }
View Full Code Here

*
*/
public class PorterStemFilterFactory extends TokenFilterFactory {
  @Override
  public PorterStemFilter create(TokenStream input) {
    return new PorterStemFilter(input);
  }
View Full Code Here

    args.put("dictionary", "stemdict.txt");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
    factory.inform(loader);
   
    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
    assertTokenStreamContents(ts, new String[] { "test", "cat" });
  }
View Full Code Here

    args.put("ignoreCase", "true");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
    factory.inform(loader);
   
    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
    assertTokenStreamContents(ts, new String[] { "test", "cat" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.en.PorterStemFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.