Examples of org.apache.lucene.analysis.util.TokenFilterFactory

org.apache.lucene.analysis.util.TokenFilterFactory
Abstract parent class for analysis factories that create {@link org.apache.lucene.analysis.TokenFilter}instances.

   * Test secondary strength, for english case is not significant.
   */
  public void testSecondaryStrength() throws Exception {
    String upperCase = "TESTING";
    String lowerCase = "testing";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "secondary",
        "decomposition", "no");
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }

View Full Code Here

   * to quaternary level 
   */
  public void testIgnorePunctuation() throws Exception {
    String withPunctuation = "foo-bar";
    String withoutPunctuation = "foo bar";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "alternate", "shifted");
    TokenStream tsPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    TokenStream tsWithoutPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withoutPunctuation)));
    assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
  }

View Full Code Here

   */
  public void testIgnoreWhitespace() throws Exception {
    String withSpace = "foo bar";
    String withoutSpace = "foobar";
    String withPunctuation = "foo-bar";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "alternate", "shifted",
        "variableTop", " ");
    TokenStream tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithoutSpace = factory.create(
        new KeywordTokenizer(new StringReader(withoutSpace)));
    assertCollatesToSame(tsWithSpace, tsWithoutSpace);
    // now assert that punctuation still matters: foo-bar < foo bar
    tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    assertCollation(tsWithPunctuation, tsWithSpace, -1);
  }

View Full Code Here

   * foobar-9 sorts before foobar-10
   */
  public void testNumerics() throws Exception {
    String nine = "foobar-9";
    String ten = "foobar-10";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "numeric", "true");
    TokenStream tsNine = factory.create(
        new KeywordTokenizer(new StringReader(nine)));
    TokenStream tsTen = factory.create(
        new KeywordTokenizer(new StringReader(ten)));
    assertCollation(tsNine, tsTen, -1);
  }

View Full Code Here

  public void testIgnoreAccentsButNotCase() throws Exception {
    String withAccents = "résumé";
    String withoutAccents = "resume";
    String withAccentsUpperCase = "Résumé";
    String withoutAccentsUpperCase = "Resume";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "caseLevel", "true");
    TokenStream tsWithAccents = factory.create(
        new KeywordTokenizer(new StringReader(withAccents)));
    TokenStream tsWithoutAccents = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    assertCollatesToSame(tsWithAccents, tsWithoutAccents);
    
    TokenStream tsWithAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
    TokenStream tsWithoutAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
    
    // now assert that case still matters: resume < Resume
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollation(tsLower, tsUpper, -1);
  }

View Full Code Here

   * before lowercase ones.
   */
  public void testUpperCaseFirst() throws Exception {
    String lower = "resume";
    String upper = "Resume";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "tertiary",
        "caseFirst", "upper");
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lower)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upper)));
    assertCollation(tsUpper, tsLower, -1);
  }

View Full Code Here

      String previous = args.put(keysAndValues[i], keysAndValues[i+1]);
      assertNull("duplicate values for key: " + keysAndValues[i], previous);
    }
    String previous = args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
    assertNull("duplicate values for key: luceneMatchVersion", previous);
    TokenFilterFactory factory = null;
    try {
      factory = clazz.getConstructor(Map.class).newInstance(args);
    } catch (InvocationTargetException e) {
      // to simplify tests that check for illegal parameters
      if (e.getCause() instanceof IllegalArgumentException) {

View Full Code Here

    }
  }
  
  private void doTestTokenFilter(String tokenfilter) throws IOException {
    Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
    TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
    if (factory != null) {
      // we managed to fully create an instance. check a few more things:
      
      // if it implements MultiTermAware, sanity check its impl
      if (factory instanceof MultiTermAwareComponent) {

View Full Code Here

    assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
    assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
  }


  public void testCreationWithBlackList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "true");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }

View Full Code Here

    input.setIntValue(123);
    factory.create(input);
  }
  
  public void testCreationWithWhiteList() throws Exception {
    TokenFilterFactory factory = tokenFilterFactory("Type",
        "types", "stoptypes-1.txt, stoptypes-2.txt",
        "enablePositionIncrements", "true",
        "useWhitelist", "true");
    NumericTokenStream input = new NumericTokenStream();
    input.setIntValue(123);
    factory.create(input);
  }

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.lucene.analysis.util.TokenFilterFactory

org.apache.lucene.analysis.core.TestAllAnalyzersHaveFactories

org.apache.lucene.analysis.core.TestFactories

org.apache.lucene.analysis.core.TestTypeTokenFilterFactory

org.apache.lucene.analysis.synonym.TestSynonymFilterFactory

org.apache.lucene.collation.TestCollationKeyFilterFactory

org.apache.lucene.collation.TestICUCollationKeyFilterFactory

org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.