Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.LowerCaseFilter


    warnDeprecated("Use LowerCaseFilterFactory instead");
  }

  public TokenFilter create(TokenStream in) {
    // hardcode the version to give exactly the old behavior
    return new LowerCaseFilter(Version.LUCENE_29, in);
  }
View Full Code Here


   
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };

    String format = args.get("format");
View Full Code Here

      this.maxSynonyms = maxSynonyms;
    }
   
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = new WhitespaceTokenizer(reader);
      ts = new LowerCaseFilter(ts);
      ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
      return ts;
    }
View Full Code Here

        throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new WhitespaceTokenizer(reader);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms);
        setPreviousTokenStream(streams);
      } else {
        streams.source.reset(reader);
        streams.result.reset(); // reset the SynonymTokenFilter
View Full Code Here

 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new WikipediaTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(true, result, stopSet);
    return result;
  }
View Full Code Here

    this.tokenStream = new DummyTokenStream("dummy", 1, 1, 0);
  }

  @Test
  public void testCreateTokenFilter() throws IOException {
    LowerCaseFilter lowercaseFilter = (LowerCaseFilter) lowercaseFilterFactory.createTokenFilter(tokenStream, null);
    assertNotNull(lowercaseFilter);
  }
View Full Code Here

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(LuceneVersion.getVersion(), reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new ASCIIFoldingFilter(result);
    List<String> list = Arrays.asList(ENGLISH_STOP_WORDS);
    Set<String> set = new HashSet<String>(list);
    result = new StopFilter(false, result, set, true);
    result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
View Full Code Here

   *       StandardFilter, LowerCaseFilter, and StopFilter
   */
  public final TokenStream tokenStream( String fieldName, Reader reader ) {
    TokenStream result = new StandardTokenizer( reader );
    result = new StandardFilter( result );
    result = new LowerCaseFilter( result );
    result = new StopFilter( result, stoptable );
    return result;
  }
View Full Code Here

    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new StopFilter(result, stoptable);
    result = new FrenchStemFilter(result, excltable);
    // Convert to lowercase after stemming!
    result = new LowerCaseFilter(result);
    return result;
  }
View Full Code Here

   *         {@link ArabicNormalizationFilter},
   *         {@link PersianNormalizationFilter} and Persian Stop words
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer(reader);
    result = new LowerCaseFilter(result);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
    /*
     * the order here is important: the stopword list is normalized with the
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.LowerCaseFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.