Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.KeywordTokenizer


 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new FrenchLightStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here


    args.put("locale", "tr");
    args.put("strength", "primary");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

    args.put("strength", "primary");
    args.put("decomposition", "canonical");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(turkishUpperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(turkishLowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

    args.put("strength", "secondary");
    args.put("decomposition", "no");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upperCase)));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lowerCase)));
    assertCollatesToSame(tsUpper, tsLower);
  }
View Full Code Here

    args.put("strength", "primary");
    args.put("alternate", "shifted");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    TokenStream tsWithoutPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withoutPunctuation)));
    assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
  }
View Full Code Here

    args.put("alternate", "shifted");
    args.put("variableTop", " ");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithoutSpace = factory.create(
        new KeywordTokenizer(new StringReader(withoutSpace)));
    assertCollatesToSame(tsWithSpace, tsWithoutSpace);
    // now assert that punctuation still matters: foo-bar < foo bar
    tsWithSpace = factory.create(
        new KeywordTokenizer(new StringReader(withSpace)));
    TokenStream tsWithPunctuation = factory.create(
        new KeywordTokenizer(new StringReader(withPunctuation)));
    assertCollation(tsWithPunctuation, tsWithSpace, -1);
  }
View Full Code Here

    args.put("locale", "en");
    args.put("numeric", "true");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsNine = factory.create(
        new KeywordTokenizer(new StringReader(nine)));
    TokenStream tsTen = factory.create(
        new KeywordTokenizer(new StringReader(ten)));
    assertCollation(tsNine, tsTen, -1);
  }
View Full Code Here

    args.put("strength", "primary");
    args.put("caseLevel", "true");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsWithAccents = factory.create(
        new KeywordTokenizer(new StringReader(withAccents)));
    TokenStream tsWithoutAccents = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    assertCollatesToSame(tsWithAccents, tsWithoutAccents);
   
    TokenStream tsWithAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
    TokenStream tsWithoutAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
   
    // now assert that case still matters: resume < Resume
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollation(tsLower, tsUpper, -1);
  }
View Full Code Here

    args.put("strength", "tertiary");
    args.put("caseFirst", "upper");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(""));
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lower)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upper)));
    assertCollation(tsUpper, tsLower, -1);
  }
View Full Code Here

    args.put("custom", "rules.txt");
    args.put("strength", "primary");
    factory.init(args);
    factory.inform(new StringMockResourceLoader(tailoredRules));
    TokenStream tsUmlaut = factory.create(
        new KeywordTokenizer(new StringReader(germanUmlaut)));
    TokenStream tsOE = factory.create(
        new KeywordTokenizer(new StringReader(germanOE)));

    assertCollatesToSame(tsUmlaut, tsOE);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.KeywordTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.