Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token.termBuffer()


    if(!catMatch.equals("Unknown")){
      document = StringEscapeUtils.unescapeHtml(CLOSE_TEXT_TAG_PATTERN.matcher(OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
      TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
      Token token = new Token();
      while((token = stream.next(token)) != null){
        contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
      }
      output.collect(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(contents.toString()));
    }
  }
View Full Code Here


        Token t;
        StringBuilder sb = new StringBuilder();
        try {
            while ((t = psf.next()) != null) {
                sb.setLength(0);
                sb.append(t.termBuffer(), 0, t.termLength());
                //System.out.println(sb.toString());
                features.add(sb.toString());
            }
        } catch (IOException e) {
            // should never happen! We're reading a flippin' STRING!
View Full Code Here

          field.setOmitTermFreqAndPositions(omitTF);
        fields.add(field);
      } else {
        Token nextToken = tokenStream.next(new Token());
        while (nextToken != null) {
          String value = new String(nextToken.termBuffer(), 0,
              nextToken.termLength());
          field = new Field(fieldName, value, fieldStore, index);
          if (unique)
            field.setOmitTermFreqAndPositions(omitTF);
          fields.add(field);
View Full Code Here

                           new Token("b   ",6,10),
                           new Token("cCc",11,15),
                           new Token("   ",16,20)), false );

    Token token = ts.next();
    assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
View Full Code Here

                           new Token("   ",16,20)), false );

    Token token = ts.next();
    assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
View Full Code Here

    Token token = ts.next();
    assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertNull(token);
   
View Full Code Here

    token = ts.next();
    assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertNull(token);
   
    ts = new TrimFilter( new IterTokenStream(
           new Token(" a", 0,2),
View Full Code Here

    PhoneticFilter filter = new PhoneticFilter(
        new IterTokenStream(stream.iterator()), enc, "text", inject );
   
    for( Token t : output ) {
      Token got = filter.next(t);
      assertEquals( new String(t.termBuffer(), 0, t.termLength()), new String(got.termBuffer(), 0, got.termLength()));
    }
    assertNull( filter.next() )// no more tokens
  }
 
  public void testEncodes() throws Exception {
View Full Code Here

    TokenStream ts = new PatternReplaceFilter
            (new WhitespaceTokenizer(new StringReader(input)),
                    Pattern.compile("a*b"),
                    "-", true);
    Token token = ts.next();
    assertEquals("-foo-foo-foo-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("c-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
View Full Code Here

                    Pattern.compile("a*b"),
                    "-", true);
    Token token = ts.next();
    assertEquals("-foo-foo-foo-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertEquals("c-", new String(token.termBuffer(), 0, token.termLength()));
    token = ts.next();
    assertNull(token);
  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.