Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Tokenizer


      this.maxChars=maxChars;
    }

    @Override
    public TokenStreamInfo getStream(String fieldName, Reader reader) {
      Tokenizer ts = new Tokenizer(reader) {
        final char[] cbuf = new char[maxChars];
        final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
        @Override
        public boolean incrementToken() throws IOException {
View Full Code Here


    return reader;
  }

  @Override
  public TokenStreamInfo getStream(String fieldName, Reader reader) {
    Tokenizer tk = tokenizer.create(charStream(reader));
    TokenStream ts = tk;
    for (int i=0; i<filters.length; i++) {
      ts = filters[i].create(ts);
    }
    return new TokenStreamInfo(tk,ts);
View Full Code Here

    final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
   
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };
View Full Code Here

    0, 1, 0, 1, 0
  };

  @Test public void testNameFilter() throws IOException {
    Reader in = new StringReader(input);
    Tokenizer tok = new SentenceTokenizer(in, detector);
    NameFilter nf = new NameFilter(tok, modelName, finder);

    CharTermAttribute cta;
    PositionIncrementAttribute pta;
    OffsetAttribute oa;
   
    int pass = 0;
   
    while (pass < 2) { // test reuse.
      int pos = 0;
      int lastStart = 0;
      int lastEnd   = 0;
     
      while (nf.incrementToken()) {
        cta = (CharTermAttribute) nf.getAttribute(CharTermAttribute.class);
        pta = (PositionIncrementAttribute) nf.getAttribute(PositionIncrementAttribute.class);
        oa  = (OffsetAttribute) nf.getAttribute(OffsetAttribute.class);
       
        System.err.println("'" + cta.toString() + "'");
        System.err.println(pta.toString());
        System.err.println(oa.toString());
        System.err.println("--- pass: " + pass);
       
        TestCase.assertEquals(tokenStrings[pos], cta.toString());
        TestCase.assertEquals(positionIncrements[pos], pta.getPositionIncrement());
       
        if (pta.getPositionIncrement() == 0) {
          TestCase.assertEquals(lastStart, oa.startOffset());
          TestCase.assertEquals(lastEnd, oa.endOffset());
        }
       
        if (!cta.toString().startsWith("NE_")) {
          TestCase.assertEquals(input.substring(oa.startOffset(), oa.endOffset()), cta.toString());
        }
       
        lastStart = oa.startOffset();
        lastEnd   = oa.endOffset();
       
        pos++;
      }
     
      //if (pass == 1) nf.dumpState();
      nf.end();
     
      in.close();
      in = new StringReader(input);
      tok.reset(in);
      pass++;
    }
  }
View Full Code Here

  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName,
                                                   final Reader reader) {
    final Tokenizer sink = new NumericTokenizer(reader, new IntNumericParser(), precisionStep);
    return new TokenStreamComponents(sink);
  }
View Full Code Here

  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName,
                                                   final Reader reader) {
    final Tokenizer sink = new NumericTokenizer(reader, new DoubleNumericParser(), precisionStep);
    return new TokenStreamComponents(sink);
  }
View Full Code Here

  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName,
                                                   final Reader reader) {
    final Tokenizer sink = new NumericTokenizer(reader, new LongNumericParser(), precisionStep);
    return new TokenStreamComponents(sink);
  }
View Full Code Here

  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName,
                                                   final Reader reader) {
    final Tokenizer sink = new NumericTokenizer(reader, new FloatNumericParser(), precisionStep);
    return new TokenStreamComponents(sink);
  }
View Full Code Here

  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName,
                                                   final Reader reader) {
    final Tokenizer sink = new NumericTokenizer(reader, new DateNumericParser(), precisionStep);
    return new TokenStreamComponents(sink);
  }
View Full Code Here

  public void testSimpleJsonTokenizer() throws Exception {
    final Reader reader = new StringReader("{ \"aaa\" : { \"bbb\" : \"ooo\" } }");
    final Map<String,String> args = this.getDefaultInitArgs();
    final JsonTokenizerFactory factory = new JsonTokenizerFactory();
    factory.init(args);
    final Tokenizer stream = factory.create(reader);
    this.assertTokenStreamContents(stream,
        new String[] {"aaa", "bbb", "ooo"});
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Tokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.