Package com.gentics.cr.lucene.autocomplete

Source Code of com.gentics.cr.lucene.autocomplete.AutocompleteAnalyzer

package com.gentics.cr.lucene.autocomplete;

import java.io.Reader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.lucene.analysis.ASCIIFoldingFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;

import com.gentics.cr.lucene.LuceneVersion;

public class AutocompleteAnalyzer extends Analyzer {

  private static final String[] ENGLISH_STOP_WORDS = { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for",
      "i", "if", "in", "into", "is", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their",
      "then", "there", "these", "they", "this", "to", "was", "will", "with" };

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(LuceneVersion.getVersion(), reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new ASCIIFoldingFilter(result);
    List<String> list = Arrays.asList(ENGLISH_STOP_WORDS);
    Set<String> set = new HashSet<String>(list);
    result = new StopFilter(false, result, set, true);
    result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
    return result;
  }

}
TOP

Related Classes of com.gentics.cr.lucene.autocomplete.AutocompleteAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.