Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.KeywordTokenizer


    String ten = "foobar-10";
    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "numeric", "true");
    TokenStream tsNine = factory.create(
        new KeywordTokenizer(new StringReader(nine)));
    TokenStream tsTen = factory.create(
        new KeywordTokenizer(new StringReader(ten)));
    assertCollation(tsNine, tsTen, -1);
  }
View Full Code Here


    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "primary",
        "caseLevel", "true");
    TokenStream tsWithAccents = factory.create(
        new KeywordTokenizer(new StringReader(withAccents)));
    TokenStream tsWithoutAccents = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    assertCollatesToSame(tsWithAccents, tsWithoutAccents);
   
    TokenStream tsWithAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
    TokenStream tsWithoutAccentsUpperCase = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
   
    // now assert that case still matters: resume < Resume
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccents)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
    assertCollation(tsLower, tsUpper, -1);
  }
View Full Code Here

    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
        "locale", "en",
        "strength", "tertiary",
        "caseFirst", "upper");
    TokenStream tsLower = factory.create(
        new KeywordTokenizer(new StringReader(lower)));
    TokenStream tsUpper = factory.create(
        new KeywordTokenizer(new StringReader(upper)));
    assertCollation(tsUpper, tsLower, -1);
  }
View Full Code Here

    args.put("custom", "rules.txt");
    args.put("strength", "primary");
    ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory(args);
    factory.inform(new StringMockResourceLoader(tailoredRules));
    TokenStream tsUmlaut = factory.create(
        new KeywordTokenizer(new StringReader(germanUmlaut)));
    TokenStream tsOE = factory.create(
        new KeywordTokenizer(new StringReader(germanOE)));

    assertCollatesToSame(tsUmlaut, tsOE);
  }
View Full Code Here

      final boolean ignoreCase = random.nextBoolean();
     
      final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
        }
      };

      checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
View Full Code Here

  public void testEmptyTerm() throws Exception {
    Random random = random();
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
   
    Analyzer b = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(Version.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
View Full Code Here

  public void testSupplementaryCharacters() throws IOException {
    final String s = _TestUtil.randomUnicodeString(random(), 10);
    final int codePointCount = s.codePointCount(0, s.length());
    final int minGram = _TestUtil.nextInt(random(), 1, 3);
    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
    assertFalse(tk.incrementToken());
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

      String text = _TestUtil.randomUnicodeString(random(), 100);
      int min = _TestUtil.nextInt(random(), 0, 100);
      int max = _TestUtil.nextInt(random(), 0, 100);
      int count = text.codePointCount(0, text.length());
      boolean expected = count >= min && count <= max;
      TokenStream stream = new KeywordTokenizer(new StringReader(text));
      stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max);
      stream.reset();
      assertEquals(expected, stream.incrementToken());
      stream.end();
      stream.close();
    }
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.KeywordTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.