Examples of TokenFilterFactory


Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * Test usage of the decomposition option for unicode normalization.
   */
  public void testNormalization() throws Exception {
    String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "tr",
        "strength", "primary",
        "decomposition", "canonical");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * Test secondary strength, for english case is not significant.
   */
  public void testSecondaryStrength() throws Exception {
    String upperCase = "TESTING";
    String lowerCase = "testing";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "secondary",
        "decomposition", "no");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * to quaternary level
   */
  public void testIgnorePunctuation() throws Exception {
    String withPunctuation = "foo-bar";
    String withoutPunctuation = "foo bar";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "alternate", "shifted");
    TokenStream tsPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    TokenStream tsWithoutPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withoutPunctuation)));
    assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   */
  public void testIgnoreWhitespace() throws Exception {
    String withSpace = "foo bar";
    String withoutSpace = "foobar";
    String withPunctuation = "foo-bar";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "alternate", "shifted",
        "variableTop", " ");
    TokenStream tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithoutSpace = factory.create(
        new KeywordTokenizer(new StringReader(withoutSpace)));
    assertCollatesToSame(tsWithSpace, tsWithoutSpace);
    // now assert that punctuation still matters: foo-bar < foo bar
    tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    assertCollation(tsWithPunctuation, tsWithSpace, -1);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * foobar-9 sorts before foobar-10
   */
  public void testNumerics() throws Exception {
    String nine = "foobar-9";
    String ten = "foobar-10";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "numeric", "true");
    TokenStream tsNine = factory.create(
        new KeywordTokenizer(new StringReader(nine)));
    TokenStream tsTen = factory.create(
        new KeywordTokenizer(new StringReader(ten)));
    assertCollation(tsNine, tsTen, -1);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

  public void testIgnoreAccentsButNotCase() throws Exception {
    String withAccents = "résumé";
    String withoutAccents = "resume";
    String withAccentsUpperCase = "Résumé";
    String withoutAccentsUpperCase = "Resume";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "caseLevel", "true");
    TokenStream tsWithAccents = factory.create(
        new KeywordTokenizer(new StringReader(withAccents)));
    TokenStream tsWithoutAccents = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    assertCollatesToSame(tsWithAccents, tsWithoutAccents);
   
    TokenStream tsWithAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
    TokenStream tsWithoutAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
   
    // now assert that case still matters: resume < Resume
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollation(tsLower, tsUpper, -1);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * before lowercase ones.
   */
  public void testUpperCaseFirst() throws Exception {
    String lower = "resume";
    String upper = "Resume";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "tertiary",
        "caseFirst", "upper");
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lower)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upper)));
    assertCollation(tsUpper, tsLower, -1);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

      String previous = args.put(keysAndValues[i], keysAndValues[i+1]);
      assertNull("duplicate values for key: " + keysAndValues[i], previous);
    }
    String previous = args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
    assertNull("duplicate values for key: luceneMatchVersion", previous);
    TokenFilterFactory factory = null;
    try {
      factory = clazz.getConstructor(Map.class).newInstance(args);
    } catch (InvocationTargetException e) {
      // to simplify tests that check for illegal parameters
      if (e.getCause() instanceof IllegalArgumentException) {
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenFilterFactory

   * Then things will sort and match correctly.
   */
  public void testBasicUsage() throws Exception {
    String turkishUpperCase = "I WİLL USE TURKİSH CASING";
    String turkishLowerCase = "ı will use turkish casıng";
    TokenFilterFactory factory = tokenFilterFactory("CollationKey",
        "language", "tr",
        "strength", "primary");
    TokenStream tsUpper = factory.create(
        new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
    TokenStream tsLower = factory.create(
        new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.