Examples of StandardTokenizer


Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new ThaiWordFilter(streams.result);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      setPreviousTokenStream(streams);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new LowerCaseFilter(streams.result);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, stopSet);
      streams.result = new GermanStemFilter(streams.result, exclusionSet);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    public MultiAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
      result = new TestFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    public PosIncrementAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
      result = new TestPosIncrementFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
   *         {@link GermanStemFilter}
   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                            result, stopSet);
    result = new GermanStemFilter(result, exclusionSet);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

*/
public class TestElision extends BaseTokenStreamTestCase {

  public void testElision() throws Exception {
    String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
    Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
    CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
    TokenFilter filter = new ElisionFilter(tokenizer, articles);
    List<String> tas = filter(filter);
    assertEquals("embrouille", tas.get(4));
    assertEquals("O'brian", tas.get(6));
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

      @Override
      protected TokenStreamComponents createComponents(String field, Reader reader) {
        final CharArraySet keywords = new CharArraySet(version, 1, false);
        keywords.add("liƛcie");

        final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
        TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
        result = new SetKeywordMarkerFilter(result, keywords);
        result = new MorfologikFilter(result, TEST_VERSION_CURRENT);

        return new TokenStreamComponents(src, result);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

  private DoubleMetaphone filter = new DoubleMetaphone();
 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    final TokenStream result = new PorterStemFilter(new StopFilter(
        true, new StandardTokenizer(Version.LUCENE_CURRENT, reader),
        StandardAnalyzer.STOP_WORDS_SET));
   
    TermAttribute termAtt = (TermAttribute) result
        .addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

public class MyAnalyzer extends Analyzer {
 
  @SuppressWarnings("deprecation")
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(
        Version.LUCENE_CURRENT, reader);
    result = new LowerCaseFilter(result);
    result = new LengthFilter(result, 3, 50);
    result = new StopFilter(true, result, StandardAnalyzer.STOP_WORDS_SET);
    result = new PorterStemFilter(result);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

 
  private final Pattern alphabets = Pattern.compile("[a-z]+");
 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(true, result, StandardAnalyzer.STOP_WORDS_SET);
   
    TermAttribute termAtt = (TermAttribute) result.addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
    try {
      while (result.incrementToken()) {
        if (termAtt.termLength() < 3) continue;
        String word = new String(termAtt.termBuffer(), 0, termAtt.termLength());
        Matcher m = alphabets.matcher(word);
       
        if (m.matches()) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.