Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token.termLength()


    Token t = input.next(in);
    if (null == t || null == t.termBuffer() || t.termLength() == 0){
      return t;
    }
    char[] termBuffer = t.termBuffer();
    int len = t.termLength();
    int start = 0;
    int end = 0;
    int endOff = 0;

    // eat the first characters
View Full Code Here


  @Override
  public Token next(Token token) throws IOException {
    Token result = input.next(token);
    if (result != null) {
      char[] termBuffer = result.termBuffer();
      int len = result.termLength();
      // if protected, don't stem.  use this to avoid stemming collisions.
      if (protWords != null && protWords.contains(termBuffer, 0, len)) {
        return result;
      }
      stemmer.setCurrent(new String(termBuffer, 0, len));//ugh, wish the Stemmer took a char array
View Full Code Here

          best.put(inputToken, theSuggestions.keySet().iterator().next());
        }
        if (hasFreqInfo) {
          isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
        }
        result.add(new String(inputToken.termBuffer(), 0, inputToken.termLength()), suggestionList);
      }
    }
    if (hasFreqInfo) {
      result.add("correctlySpelled", isCorrectlySpelled);
    }
View Full Code Here

 
  public final Token next(Token in) throws IOException {
    Token t = input.next(in);
    if (t == null)
      return null;
    CharSequence text = CharBuffer.wrap(t.termBuffer(), 0, t.termLength());
    Matcher m = p.matcher(text);
    if (all) {
      t.setTermText(m.replaceAll(replacement));
    } else {
      t.setTermText(m.replaceFirst(replacement));
View Full Code Here

        write(t);
        t = null;
      }
      boolean dup=false;
      for (Token outTok : output()) {
        int tokLen = tok.termLength();
        if (outTok.termLength() == tokLen && ArraysUtils.equals(outTok.termBuffer(), 0, tok.termBuffer(), 0, tokLen)) {
          dup=true;
          //continue;;
        }
      }
View Full Code Here

      return temp;
    }
   
    Token t = input.next(in);
    if( t != null ) {
      String value = new String(t.termBuffer(), 0, t.termLength());
      try {
        value = encoder.encode(value).toString();
      }
      catch (Exception ignored) {} // just use the direct text
      //Token m = new Token(value, t.startOffset(), t.endOffset(), name );
View Full Code Here

    NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
    Token t = null;
    while (((t = tstream.next()) != null)) {
      NamedList<Object> token = new SimpleOrderedMap<Object>();
      tokens.add("token", token);
      token.add("value", new String(t.termBuffer(), 0, t.termLength()));
      token.add("start", t.startOffset());
      token.add("end", t.endOffset());
      token.add("posInc", t.getPositionIncrement());
      token.add("type", t.type());
      //TODO: handle payloads
View Full Code Here

      // be safe to use the next(Token) method.
      t = input.next(in);
      if (t == null) return null;

      char [] termBuffer = t.termBuffer();
      int len = t.termLength();
      int start=0;
      if (len ==0) continue;

      origPosIncrement += t.getPositionIncrement();
View Full Code Here

    Token tok=null;
    for (int i=start; i<end; i++) {
      tok = lst.get(i);
      if (catenateSubwords) {
        if (i==start) firstTok=tok;
        sb.append(tok.termBuffer(), 0, tok.termLength());
      }
      if (generateSubwords) {
        queue.add(tok);
      }
    }
View Full Code Here

  public Token next(Token token) throws IOException {
    Token t = input.next(token);
    if (t != null) {

      char[] termBuffer = t.termBuffer();
      int termBufferLength = t.termLength();
      char[] backup = null;
      if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
        //make a backup in case we exceed the word count
        System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
      }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.