Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet


   
    assertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
  }
 
  public void testWithStemExclusionSet() throws IOException{
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("hole");
    CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
  }
View Full Code Here


    }
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = a.tokenStream(fieldName,reader);
      return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
          new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
    }
View Full Code Here

    a.setStemExclusionTable(new String[] { "quintessência" });
    checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
  }
 
  public void testStemExclusionTableBWCompat() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }
View Full Code Here

        new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }

  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Brasília Brasilia")), set));
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }
View Full Code Here

            "Brasília Brasilia")), set));
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }

  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set1.add("Brasilia");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Brasília Brasilia")), set), set1);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
  }
View Full Code Here

   
    assertAnalyzesTo(a, "градове", new String[] {"град"});
  }
 
  public void testWithStemExclusionSet() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
    set.add("строеве");
    Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
  }
View Full Code Here

    assertAnalyzesTo(a, "строя", new String[] {"стр"});
    assertAnalyzesTo(a, "строят", new String[] {"стр"});
  }

  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
    set.add("строеве");
    MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);

    BulgarianStemFilter filter = new BulgarianStemFilter(
        new KeywordMarkerFilter(tokenStream, set));
    assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
View Full Code Here

   
  }
 
  public void testExclusionTableViaCtor() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
    set.add("lichamelijk");
    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
   
    a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
View Full Code Here

    fa.setStemExclusionTable(new String[] { "habitable" });
    assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
  }
 
  public void testExclusionTableViaCtor() throws Exception {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("habitable");
    FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
        CharArraySet.EMPTY_SET, set);
    assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
        "chist" });
View Full Code Here

      }
    } 
  }
 
  public void testPositionIncrements() throws Exception {
    final CharArraySet protWords = new CharArraySet(new HashSet<String>(Arrays.asList("NUTCH")), false);
   
    /* analyzer that uses whitespace + wdf */
    Analyzer a = new Analyzer() {
      public TokenStream tokenStream(String field, Reader reader) {
        return new WordDelimiterFilter(
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.