Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.PorterStemFilter


    TokenStream result = new SentenceTokenizer(reader);
    result = new WordTokenizer(result, wordSegment);
    // result = new LowerCaseFilter(result);
    // 不再需要LowerCaseFilter,因为SegTokenFilter已经将所有英文字符转换成小写
    // stem太严格了, This is not bug, this feature:)
    result = new PorterStemFilter(result);
    if (stopWords != null) {
      result = new StopFilter(result, stopWords, false);
    }
    return result;
  }
View Full Code Here


      throw new IllegalArgumentException("child analyzer must not be null");
 
    return new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new PorterStemFilter(
            child.tokenStream(fieldName, reader));
//        /* PorterStemFilter and SnowballFilter have the same behaviour,
//        but PorterStemFilter is much faster. */
//        return new org.apache.lucene.analysis.snowball.SnowballFilter(
//            child.tokenStream(fieldName, reader), "English");
View Full Code Here

public class TwitterAnalyzer extends Analyzer {
  private DoubleMetaphone filter = new DoubleMetaphone();
 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    final TokenStream result = new PorterStemFilter(new StopFilter(
        true, new StandardTokenizer(Version.LUCENE_CURRENT, reader),
        StandardAnalyzer.STOP_WORDS_SET));
   
    TermAttribute termAtt = (TermAttribute) result
        .addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
    try {
      while (result.incrementToken()) {
        String word = new String(termAtt.termBuffer(), 0, termAtt
            .termLength());
        buf.append(filter.encode(word)).append(" ");
       
      }
View Full Code Here

    TokenStream result = new StandardTokenizer(
        Version.LUCENE_CURRENT, reader);
    result = new LowerCaseFilter(result);
    result = new LengthFilter(result, 3, 50);
    result = new StopFilter(true, result, StandardAnalyzer.STOP_WORDS_SET);
    result = new PorterStemFilter(result);
    return result;
  }
View Full Code Here

* &lt;/fieldType&gt;</pre>
* @version $Id: PorterStemFilterFactory.java 1074243 2011-02-24 18:07:16Z rmuir $
*/
public class PorterStemFilterFactory extends BaseTokenFilterFactory {
  public PorterStemFilter create(TokenStream input) {
    return new PorterStemFilter(input);
  }
View Full Code Here

    TokenStream result = new SentenceTokenizer(reader);
    result = new WordTokenFilter(result);
    // result = new LowerCaseFilter(result);
    // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
    // The porter stemming is too strict, this is not a bug, this is a feature:)
    result = new PorterStemFilter(result);
    if (stopWords != null) {
      result = new StopFilter(result, stopWords, false);
    }
    return result;
  }
View Full Code Here

    if (streams == null) {
      streams = new SavedStreams();
      setPreviousTokenStream(streams);
      streams.tokenStream = new SentenceTokenizer(reader);
      streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
      streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
      if (stopWords != null) {
        streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopWords, false);
      }
    } else {
      streams.tokenStream.reset(reader);
View Full Code Here

    TokenStream result = new SentenceTokenizer(reader);
    result = new WordTokenFilter(result);
    // result = new LowerCaseFilter(result);
    // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
    // The porter stemming is too strict, this is not a bug, this is a feature:)
    result = new PorterStemFilter(result);
    if (!stopWords.isEmpty()) {
      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                              result, stopWords, false);
    }
    return result;
View Full Code Here

    if (streams == null) {
      streams = new SavedStreams();
      setPreviousTokenStream(streams);
      streams.tokenStream = new SentenceTokenizer(reader);
      streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
      streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
      if (!stopWords.isEmpty()) {
        streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                                     streams.filteredTokenStream, stopWords, false);
      }
    } else {
View Full Code Here

    ResourceLoader loader = new SolrResourceLoader(null, null);
    args.put("dictionary", "stemdict.txt");
    factory.init(args);
    factory.inform(loader);
   
    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
    assertTokenStreamContents(ts, new String[] { "test", "cat" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.PorterStemFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.