/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource; // javadoc @link
import java.io.IOException;
import java.util.LinkedList;
/**
* Handles input and output buffering of TokenStream
*
* <pre>
* // Example of a class implementing the rule "A" "B" => "Q" "B"
* class MyTokenStream extends BufferedTokenStream {
* public MyTokenStream(TokenStream input) {super(input);}
* protected Token process(Token t) throws IOException {
* if ("A".equals(t.termText())) {
* Token t2 = read();
* if (t2!=null && "B".equals(t2.termText())) t.setTermText("Q");
* if (t2!=null) pushBack(t2);
* }
* return t;
* }
* }
*
* // Example of a class implementing "A" "B" => "A" "A" "B"
* class MyTokenStream extends BufferedTokenStream {
* public MyTokenStream(TokenStream input) {super(input);}
* protected Token process(Token t) throws IOException {
* if ("A".equals(t.termText()) && "B".equals(peek(1).termText()))
* write((Token)t.clone());
* return t;
* }
* }
* </pre>
*
* NOTE: BufferedTokenStream does not clone() any Tokens. This is instead the
* responsibility of the implementing subclass. In the "A" "B" => "A" "A" "B"
* example above, the subclass must clone the additional "A" it creates.
*
* @deprecated This class does not support custom attributes. Extend TokenFilter instead,
* using {@link AttributeSource#captureState()} and {@link AttributeSource#restoreState(State)}
* which support all attributes.
*/
@Deprecated
public abstract class BufferedTokenStream extends TokenFilter {
// in the future, might be faster if we implemented as an array based CircularQueue
private final LinkedList<Token> inQueue = new LinkedList<Token>();
private final LinkedList<Token> outQueue = new LinkedList<Token>();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
public BufferedTokenStream(TokenStream input) {
super(input);
}
/**
* Process a token. Subclasses may read more tokens from the input stream,
* write more tokens to the output stream, or simply return the next token
* to be output. Subclasses may return null if the token is to be dropped.
* If a subclass writes tokens to the output stream and returns a
* non-null Token, the returned Token is considered to be at the head of
* the token output stream.
*/
protected abstract Token process(Token t) throws IOException;
@Override
public final boolean incrementToken() throws IOException {
while (true) {
if (!outQueue.isEmpty()) return writeToken(outQueue.removeFirst());
Token t = read();
if (null == t) return false;
Token out = process(t);
if (null != out) return writeToken(out);
// loop back to top in case process() put something on the output queue
}
}
/**
* Read a token from the buffered input stream.
* @return null at EOS
*/
protected Token read() throws IOException {
if (inQueue.isEmpty()) {
Token t = readToken();
return t;
}
return inQueue.removeFirst();
}
/**
* Push a token back into the buffered input stream, such that it will
* be returned by a future call to <code>read()</code>
*/
protected void pushBack(Token t) {
inQueue.addFirst(t);
}
/**
* Peek n tokens ahead in the buffered input stream, without modifying
* the stream.
* @param n Number of tokens into the input stream to peek, 1 based ...
* 0 is invalid
* @return a Token which exists in the input stream, any modifications
* made to this Token will be "real" if/when the Token is
* <code>read()</code> from the stream.
*/
protected Token peek(int n) throws IOException {
int fillCount = n-inQueue.size();
for (int i=0; i < fillCount; i++) {
Token t = readToken();
if (null==t) return null;
inQueue.addLast(t);
}
return inQueue.get(n-1);
}
/** old api emulation for back compat */
private Token readToken() throws IOException {
if (!input.incrementToken()) {
return null;
} else {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
token.setPayload(payloadAtt.getPayload());
return token;
}
}
/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
clearAttributes();
termAtt.copyBuffer(token.buffer(), 0, token.length());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
payloadAtt.setPayload(token.getPayload());
return true;
}
/**
* Write a token to the buffered output stream
*/
protected void write(Token t) {
outQueue.addLast(t);
}
/**
* Provides direct Iterator access to the buffered output stream.
* Modifying any token in this Iterator will affect the resulting stream.
*/
protected Iterable<Token> output() {
return outQueue;
}
@Override
public void reset() throws IOException {
super.reset();
inQueue.clear();
outQueue.clear();
}
}