Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.WhitespaceAnalyzer.tokenStream()


class SimpleQueryConverter extends SpellingQueryConverter{
  @Override
  public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
    TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
    Token tok = null;
    try {
      while ((tok = ts.next()) != null){
        result.add(tok);
      }
View Full Code Here


    // analyzer to introduce stopwords and increment gaps
    Analyzer stpa = new Analyzer() {
      final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
      public TokenStream tokenStream(String fieldName, Reader reader) {
        TokenStream ts = a.tokenStream(fieldName,reader);
        return new StopFilter(ts,new String[]{"stop"});
      }
    };

    // should not find "1 2" because there is a gap of 1 in the index
View Full Code Here

   * Basic analyzer behavior should be to keep sequential terms in one
   * increment from one another.
   */
  public void testIncrementingPositions() throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream ts = analyzer.tokenStream("field",
                                new StringReader("one two three four five"));

    while (true) {
      Token token = ts.next();
      if (token == null) break;
View Full Code Here

    // analyzer to introduce stopwords and increment gaps
    Analyzer stpa = new Analyzer() {
      final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
      public TokenStream tokenStream(String fieldName, Reader reader) {
        TokenStream ts = a.tokenStream(fieldName,reader);
        return new StopFilter(ts,new String[]{"stop"});
      }
    };

    // should not find "1 2" because there is a gap of 1 in the index
View Full Code Here

   * Basic analyzer behavior should be to keep sequential terms in one
   * increment from one another.
   */
  public void testIncrementingPositions() throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream ts = analyzer.tokenStream("field",
                                new StringReader("one two three four five"));
    final Token reusableToken = new Token();
    for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
      assertEquals(nextToken.term(), 1, nextToken.getPositionIncrement());
    }
View Full Code Here

   * Basic analyzer behavior should be to keep sequential terms in one
   * increment from one another.
   */
  public void testIncrementingPositions() throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream ts = analyzer.tokenStream("field",
                                new StringReader("one two three four five"));

    while (true) {
      Token token = ts.next();
      if (token == null) break;
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();
View Full Code Here

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);
View Full Code Here

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();
View Full Code Here

  public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    TokenStream sink = tee.newSinkTokenStream();
    Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f1);
    doc.add(f2);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.