Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream


  public void assertExpectedHighlightCount(final int maxNumFragmentsRequired,
      final int expectedHighlights) throws Exception {
    for (int i = 0; i < hits.totalHits; i++) {
      String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
      QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
      Highlighter highlighter = new Highlighter(this, scorer);

      highlighter.setTextFragmenter(new SimpleFragmenter(40));
View Full Code Here


     *   and {@link RussianStemFilter}
     */
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader)
    {
        TokenStream result = new RussianLetterTokenizer(reader);
        result = new LowerCaseFilter(result);
        result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                result, stopSet);
        result = new RussianStemFilter(result);
        return result;
View Full Code Here

   *         filtered with {@link StandardFilter}, {@link StopFilter},
   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
   */
  @Override
  public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
    result = new StandardFilter(result);
    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                            result, stoptable);
    result = new FrenchStemFilter(result, excltable);
    // Convert to lowercase after stemming!
View Full Code Here

    this.outputUnigrams = outputUnigrams;
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream wrapped;
    try {
      wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
    } catch (IOException e) {
      wrapped = defaultAnalyzer.tokenStream(fieldName, reader);
    }
View Full Code Here

      streams = new SavedStreams();
      streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
      streams.shingle = new ShingleFilter(streams.wrapped);
      setPreviousTokenStream(streams);
    } else {
      TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
      if (result == streams.wrapped) {
        /* the wrapped analyzer reused the stream */
        streams.shingle.reset();
      } else {
        /* the wrapped analyzer did not, create a new shingle around the new one */
 
View Full Code Here

   *         {@link ArabicNormalizationFilter},
   *         {@link PersianNormalizationFilter} and Persian Stop words
   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer(reader);
    result = new LowerCaseFilter(result);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
    /*
 
View Full Code Here

   *   filtered with {@link StandardFilter}, {@link StopFilter},
   *   and {@link DutchStemFilter}
   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
    result = new StandardFilter(result);
    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                            result, stoptable);
    result = new DutchStemFilter(result, excltable, stemdict);
    return result;
View Full Code Here

  }
 
  public void testTokenStream() throws Exception {
    QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());
    a.addStopWords(reader, 10);
    TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
    TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
    assertTrue(ts.incrementToken());
    assertEquals("this", termAtt.term());
    assertFalse(ts.incrementToken());
  }
View Full Code Here

    * @return  A {@link TokenStream} built from a {@link ChineseTokenizer}
    *   filtered with {@link ChineseFilter}.
    */
    @Override
    public final TokenStream tokenStream(String fieldName, Reader reader) {
        TokenStream result = new ChineseTokenizer(reader);
        result = new ChineseFilter(result);
        return result;
    }
View Full Code Here

    this.collator = collator;
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new KeywordTokenizer(reader);
    result = new ICUCollationKeyFilter(result, collator);
    return result;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.