final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("あり"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
}
};
assertAnalyzesTo(a, "それはまだ実験段階にあります",
new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" }