Package org.apache.lucene.analysis.util

Examples of org.apache.lucene.analysis.util.TokenFilterFactory


            if(filterConfigLine == null || filterConfigLine.isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(value)){
                continue; //ignore null, empty and the default value
            }
            Entry<String,Map<String,String>> filterConfig = parseConfigLine(
                PROPERTY_CHAR_FILTER_FACTORY, filterConfigLine);
            TokenFilterFactory tff = initAnalyzer(PROPERTY_TOKEN_FILTER_FACTORY,
                filterConfig.getKey(), TokenFilterFactory.class,
                filterConfig.getValue());
            filterFactories.add(tff);
        }
        //init the language configuration
View Full Code Here


  protected void doTestTokenizerFactoryArguments(final Version ver,
                                                 final Class delegatorClass)
    throws Exception {

    final String clazz = PatternTokenizerFactory.class.getName();
    TokenFilterFactory factory = null;

    // simple arg form
    factory = tokenFilterFactory("Synonym", ver,
        "synonyms", "synonyms.txt",
        "tokenizerFactory", clazz,
View Full Code Here

    assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
    assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
  }

  public void testCreationWithBlackList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "false");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }
View Full Code Here

    input.setIntValue(123);
    factory.create(input);
  }
 
  public void testCreationWithWhiteList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "false",
        "useWhitelist", "true");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }
View Full Code Here

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

   * Test usage of the decomposition option for unicode normalization.
   */
  public void testNormalization() throws Exception {
    String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "tr",
        "strength", "primary",
        "decomposition", "canonical");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

   * This works even with identical strength.
   */
  public void testFullDecomposition() throws Exception {
    String fullWidth = "Testing";
    String halfWidth = "Testing";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "zh",
        "strength", "identical",
        "decomposition", "full");
    TokenStream tsFull = factory.create(
        new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD, false));
    TokenStream tsHalf = factory.create(
        new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsFull, tsHalf);
  }
View Full Code Here

   * Test secondary strength, for english case is not significant.
   */
  public void testSecondaryStrength() throws Exception {
    String upperCase = "TESTING";
    String lowerCase = "testing";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "en",
        "strength", "secondary",
        "decomposition", "no");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

   * Test usage of the decomposition option for unicode normalization.
   */
  public void testNormalization() throws Exception {
    String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary",
        "decomposition", "canonical");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.util.TokenFilterFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.