Examples of org.apache.lucene.analysis.util.TokenFilterFactory

org.apache.lucene.analysis.util.TokenFilterFactory
Abstract parent class for analysis factories that create {@link org.apache.lucene.analysis.TokenFilter}instances.

            if(filterConfigLine == null || filterConfigLine.isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(value)){
                continue; //ignore null, empty and the default value
            }
            Entry<String,Map<String,String>> filterConfig = parseConfigLine(
                PROPERTY_CHAR_FILTER_FACTORY, filterConfigLine);
            TokenFilterFactory tff = initAnalyzer(PROPERTY_TOKEN_FILTER_FACTORY, 
                filterConfig.getKey(), TokenFilterFactory.class, 
                filterConfig.getValue());
            filterFactories.add(tff);
        }
        //init the language configuration

View Full Code Here

  protected void doTestTokenizerFactoryArguments(final Version ver, 
                                                 final Class delegatorClass) 
    throws Exception {


    final String clazz = PatternTokenizerFactory.class.getName();
    TokenFilterFactory factory = null;


    // simple arg form
    factory = tokenFilterFactory("Synonym", ver,
        "synonyms", "synonyms.txt", 
        "tokenizerFactory", clazz,

View Full Code Here

    assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
    assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
  }


  public void testCreationWithBlackList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "false");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }

View Full Code Here

    input.setIntValue(123);
    factory.create(input);
  }
  
  public void testCreationWithWhiteList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "false",
        "useWhitelist", "true");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }

View Full Code Here

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

   * Test usage of the decomposition option for unicode normalization.
   */
  public void testNormalization() throws Exception {
    String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "tr",
        "strength", "primary",
        "decomposition", "canonical");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

   * This works even with identical strength.
   */
  public void testFullDecomposition() throws Exception {
    String fullWidth = "Ｔｅｓｔｉｎｇ";
    String halfWidth = "Testing";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "zh",
        "strength", "identical",
        "decomposition", "full");
    TokenStream tsFull = factory.create(
        new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD, false));
    TokenStream tsHalf = factory.create(
        new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsFull, tsHalf);
  }

View Full Code Here

   * Test secondary strength, for english case is not significant.
   */
  public void testSecondaryStrength() throws Exception {
    String upperCase = "TESTING";
    String lowerCase = "testing";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "en",
        "strength", "secondary",
        "decomposition", "no");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

   * Test usage of the decomposition option for unicode normalization.
   */
  public void testNormalization() throws Exception {
    String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary",
        "decomposition", "canonical");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.lucene.analysis.util.TokenFilterFactory

org.apache.lucene.analysis.core.TestAllAnalyzersHaveFactories

org.apache.lucene.analysis.core.TestFactories

org.apache.lucene.analysis.core.TestTypeTokenFilterFactory

org.apache.lucene.analysis.synonym.TestSynonymFilterFactory

org.apache.lucene.collation.TestCollationKeyFilterFactory

org.apache.lucene.collation.TestICUCollationKeyFilterFactory

org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.