Package search

Source Code of search.SourceCodeAnalyzer$CodeTokenizer

package search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

import java.io.Reader;
import java.io.IOException;

/** An Analyzer that filters tokenizes sequences matching Character.isJavaIdentifierPart() and then toLowers them. */
public final class SourceCodeAnalyzer extends Analyzer
{

    private static final String[] STOP_WORDS = {
      // Java
        "public","private","protected","interface",
        "abstract","implements","extends","null", "new",
        "switch","case", "default" ,"synchronized" ,
        "do", "if", "else", "break","continue","this",
        "assert" ,"for","instanceof", "transient",
        "final", "static" ,"void","catch","try",
        "throws","throw","class", "finally","return",
        "const" , "native", "super","while", "import",
        "package" ,"true", "false",
      // English
        "a", "an", "and", "are","as","at","be", "but",
        "by", "for", "if", "in", "into", "is", "it",
        "no", "not", "of", "on", "or", "s", "such",
        "that", "the", "their", "then", "there","these",
        "they", "this", "to", "was", "will", "with"
    };

    static PerFieldAnalyzerWrapper analyzerForField (String fieldName, Analyzer defaultAnalyzer)
    {
        if (defaultAnalyzer == null) defaultAnalyzer = new StandardAnalyzer();
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
        analyzer.addAnalyzer(fieldName, new SourceCodeAnalyzer());
        return analyzer;
    }


    public TokenStream tokenStream (String fieldName, Reader reader)
    {
        // return new StopFilter(new CodeTokenizer(reader), STOP_WORDS);
        return new CodeTokenizer(reader);
    }

    static class CodeTokenizer extends CharTokenizer
    {
        public CodeTokenizer (Reader in)
        {
            super(in);
        }

        protected boolean isTokenChar (char c)
        {
            return Character.isJavaIdentifierPart(c);
        }

        protected char normalize (char c)
        {
            return Character.toLowerCase(c);
        }
    }
}
TOP

Related Classes of search.SourceCodeAnalyzer$CodeTokenizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.