Examples of MockTokenizer


Examples of org.apache.lucene.analysis.MockTokenizer

  }


  public void testFloatEncoding() throws Exception {
    String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    filter.reset();
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

    filter.close();
  }

  public void testIntEncoding() throws Exception {
    String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    filter.reset();
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

  private TokenStream input;
 
  @Override
  public void setUp() throws Exception {
    super.setUp();
    input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

    NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
    assertTokenStreamContents(filter, new String[0], new int[0], new int[0]);
  }
 
  public void testSmallTokenInStream() throws Exception {
    input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
    NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
    assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}, new int[] {1, 2});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

  // so in this case we behave like WDF, and preserve any modified offsets
  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

      final int min = _TestUtil.nextInt(random(), 2, 10);
      final int max = _TestUtil.nextInt(random(), min, 20);
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
          return new TokenStreamComponents(tokenizer,
              new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
        }   
      };
      checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {

  public void test() throws IOException {
    String test = "The quick red fox jumped over the lazy brown dogs";

    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
    int count = 0;
    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    nptf.reset();
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

   * Ensure the filter actually decomposes text.
   */
  public void testWordBreak() throws Exception {
    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
    Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี");
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("ThaiWord").create(stream);
    assertTokenStreamContents(stream, new String[] {"การ", "ที่", "ได้",
        "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

    final SynonymMap map = b.build();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
      }
    };

    assertAnalyzesTo(analyzer, "a b c",
View Full Code Here

Examples of org.apache.lucene.analysis.MockTokenizer

    final SynonymMap map = b.build();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
      }
    };

    assertAnalyzesTo(analyzer, "a b c",
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.