Examples of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet

org.apache.lucene.analysis.CharArraySet
Efficient unmodifiable {@link CharArraySet}. This implementation does not delegate calls to a give {@link CharArraySet} like{@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passesthe internal representation of a {@link CharArraySet} to a superconstructor and overrides all mutators.

    
    assertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
  }
  
  public void testWithStemExclusionSet() throws IOException{
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("hole");
    CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
  }

View Full Code Here

    }
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = a.tokenStream(fieldName,reader);
      return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
          new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
    }

View Full Code Here

    a.setStemExclusionTable(new String[] { "quintessência" });
    checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
  }
  
  public void testStemExclusionTableBWCompat() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }

View Full Code Here

        new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }


  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Brasília Brasilia")), set));
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }

View Full Code Here

            "Brasília Brasilia")), set));
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }


  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set1.add("Brasilia");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Brasília Brasilia")), set), set1);
    assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
  }

View Full Code Here

    
    assertAnalyzesTo(a, "градове", new String[] {"град"});
  }
  
  public void testWithStemExclusionSet() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
    set.add("строеве");
    Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
  }

View Full Code Here

    assertAnalyzesTo(a, "строя", new String[] {"стр"});
    assertAnalyzesTo(a, "строят", new String[] {"стр"});
  }


  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
    set.add("строеве");
    MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);


    BulgarianStemFilter filter = new BulgarianStemFilter(
        new KeywordMarkerFilter(tokenStream, set));
    assertTokenStreamContents(filter, new String[] { "строй", "строеве" });

View Full Code Here


    
  }
  
  public void testExclusionTableViaCtor() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
    set.add("lichamelijk");
    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
    
    a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });

View Full Code Here

    fa.setStemExclusionTable(new String[] { "habitable" });
    assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
  }
  
  public void testExclusionTableViaCtor() throws Exception {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("habitable");
    FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
        CharArraySet.EMPTY_SET, set);
    assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
        "chist" });

View Full Code Here

      }
    }  
  }
  
  public void testPositionIncrements() throws Exception {
    final CharArraySet protWords = new CharArraySet(new HashSet<String>(Arrays.asList("NUTCH")), false);
    
    /* analyzer that uses whitespace + wdf */
    Analyzer a = new Analyzer() {
      public TokenStream tokenStream(String field, Reader reader) {
        return new WordDelimiterFilter(

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet

com.github.le11.nls.lucene.TypeAwareStopFilter

com.github.le11.nls.solr.TypeAwareStopFilterFactory

de.jetwick.solrplugin.TWordDelimiterFilterFactory

edu.wiki.index.WikipediaAnalyzer

org.apache.lucene.analysis.ar.TestArabicAnalyzer

org.apache.lucene.analysis.ar.TestArabicStemFilter

org.apache.lucene.analysis.bg.TestBulgarianAnalyzer

org.apache.lucene.analysis.bg.TestBulgarianStemmer

org.apache.lucene.analysis.br.TestBrazilianStemmer

org.apache.lucene.analysis.ca.TestCatalanAnalyzer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.