Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.ReusableAnalyzerBase$TokenStreamComponents


    };
    checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
  }
 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
      }
View Full Code Here


  public void testRandomData() throws IOException {
    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
  }
 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
      }
View Full Code Here

                     surfaceForms);
  }

  public void testLatticeToDot() throws Exception {
    final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
    final Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
        tokenizer.setGraphvizFormatter(gv2);
        return new TokenStreamComponents(tokenizer, tokenizer);
View Full Code Here

    checkRandomData(random, katakanaAnalyzer, 1000*RANDOM_MULTIPLIER);
    checkRandomData(random, romajiAnalyzer, 1000*RANDOM_MULTIPLIER);
  }
 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
      }
View Full Code Here

    super(host, "lucene_server");

    this.allowedThreads = allowedThreads;
    this.initialThreads = Thread.activeCount() + 3;

    this.analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(
            Version.LUCENE_36, reader);
View Full Code Here

  // LUCENE-3642
  // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
  // wrt original text if a previous filter increases the length of the word (in this case æ -> ae)
  // so in this case we behave like WDF, and preserve any modified offsets
  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new EdgeNGramTokenFilter(filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
View Full Code Here

        new int[]    {   11,    11,     11,      11,       11,        11,         11,          11,           11,            11,             11 });
  }
 
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
      }   
    };
    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   
    Analyzer b = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
View Full Code Here

    };
    checkRandomData(random, b, 10000*RANDOM_MULTIPLIER, 20, false, false);
  }
 
  public void testEmptyTerm() throws Exception {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
   
    Analyzer b = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
View Full Code Here

        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
      }
      final SynonymMap map = b.build();
      final boolean ignoreCase = random.nextBoolean();
     
      final Analyzer analyzer = new ReusableAnalyzerBase() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
        }
View Full Code Here

        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
      }
      final SynonymMap map = b.build();
      final boolean ignoreCase = random.nextBoolean();
     
      final Analyzer analyzer = new ReusableAnalyzerBase() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
        }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.ReusableAnalyzerBase$TokenStreamComponents

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.