Package org.apache.lucene.queryParser.standard

Source Code of org.apache.lucene.queryParser.standard.TestMultiAnalyzerWrapper$TestPosIncrementFilter

package org.apache.lucene.queryParser.standard;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.standard.QueryParserWrapper;
import org.apache.lucene.util.LuceneTestCase;

/**
* This test case is a copy of the core Lucene query parser test, it was adapted
* to use new QueryParserWrapper instead of the old query parser.
*
* Test QueryParser's ability to deal with Analyzers that return more than one
* token per position or that return tokens with a position increment > 1.
*
*/
public class TestMultiAnalyzerWrapper extends LuceneTestCase {

  private static int multiToken = 0;

  public void testMultiAnalyzer() throws ParseException {

    QueryParserWrapper qp = new QueryParserWrapper("", new MultiAnalyzer());

    // trivial, no multiple tokens:
    assertEquals("foo", qp.parse("foo").toString());
    assertEquals("foo", qp.parse("\"foo\"").toString());
    assertEquals("foo foobar", qp.parse("foo foobar").toString());
    assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString());
    assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"")
        .toString());

    // two tokens at the same position:
    assertEquals("(multi multi2) foo", qp.parse("multi foo").toString());
    assertEquals("foo (multi multi2)", qp.parse("foo multi").toString());
    assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi")
        .toString());
    assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", qp.parse(
        "+(foo multi) +(bar multi)").toString());
    assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp
        .parse("+(foo multi) field:\"bar multi\"").toString());

    // phrases:
    assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString());
    assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString());
    assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", qp.parse(
        "\"foo multi foobar multi\"").toString());

    // fields:
    assertEquals("(field:multi field:multi2) field:foo", qp.parse(
        "field:multi field:foo").toString());
    assertEquals("field:\"(multi multi2) foo\"", qp
        .parse("field:\"multi foo\"").toString());

    // three tokens at one position:
    assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti")
        .toString());
    assertEquals("foo (triplemulti multi3 multi2) foobar", qp.parse(
        "foo triplemulti foobar").toString());

    // phrase with non-default slop:
    assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10")
        .toString());

    // phrase with non-default boost:
    assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2")
        .toString());

    // phrase after changing default slop
    qp.setPhraseSlop(99);
    assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse("\"multi foo\" bar")
        .toString());
    assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp.parse(
        "\"multi foo\" \"foo bar\"~2").toString());
    qp.setPhraseSlop(0);

    // non-default operator:
    qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR);
    assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString());

  }

  // public void testMultiAnalyzerWithSubclassOfQueryParser() throws
  // ParseException {
  // this test doesn't make sense when using the new QueryParser API
  // DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer());
  // qp.setPhraseSlop(99); // modified default slop
  //
  // // direct call to (super's) getFieldQuery to demonstrate differnce
  // // between phrase and multiphrase with modified default slop
  // assertEquals("\"foo bar\"~99",
  // qp.getSuperFieldQuery("","foo bar").toString());
  // assertEquals("\"(multi multi2) bar\"~99",
  // qp.getSuperFieldQuery("","multi bar").toString());
  //
  //   
  // // ask sublcass to parse phrase with modified default slop
  // assertEquals("\"(multi multi2) foo\"~99 bar",
  // qp.parse("\"multi foo\" bar").toString());
  //   
  // }

  public void testPosIncrementAnalyzer() throws ParseException {
    QueryParserWrapper qp = new QueryParserWrapper("",
        new PosIncrementAnalyzer());
    assertEquals("quick brown", qp.parse("the quick brown").toString());
    assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());
    assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());
    assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"")
        .toString());
  }

  /**
   * Expands "multi" to "multi" and "multi2", both at the same position, and
   * expands "triplemulti" to "triplemulti", "multi3", and "multi2".
   */
  private class MultiAnalyzer extends Analyzer {

    public MultiAnalyzer() {
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(reader);
      result = new TestFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
  }

  private final class TestFilter extends TokenFilter {

    private String prevType;
    private int prevStartOffset;
    private int prevEndOffset;

    TermAttribute termAtt;
    PositionIncrementAttribute posIncrAtt;
    OffsetAttribute offsetAtt;
    TypeAttribute typeAtt;

    public TestFilter(TokenStream in) {
      super(in);
      termAtt = (TermAttribute) addAttribute(TermAttribute.class);
      posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
      offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
      typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);

    }

    /*
     * (non-Javadoc)
     *
     * @see
     * org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis
     * .Token)
     */
    @Override
    public Token next(Token reusableToken) throws IOException {

      if (multiToken > 0) {
        reusableToken.setTermBuffer("multi" + (multiToken + 1));
        reusableToken.setStartOffset(prevStartOffset);
        reusableToken.setEndOffset(prevEndOffset);
        reusableToken.setType(prevType);
        reusableToken.setPositionIncrement(0);
        multiToken--;
        return reusableToken;
      } else {
        boolean next = (reusableToken = input.next(token)) != null;
        if (next == false) {
          return null;
        }
        prevType = reusableToken.type();
        prevStartOffset = reusableToken.startOffset();
        prevEndOffset = reusableToken.endOffset();
        String text = reusableToken.term();
        if (text.equals("triplemulti")) {
          multiToken = 2;
          return reusableToken;
        } else if (text.equals("multi")) {
          multiToken = 1;
          return reusableToken;
        } else {
          return reusableToken;
        }
      }

    }

    private Token token = new Token();

    public final boolean incrementToken() throws java.io.IOException {
      if (multiToken > 0) {
        termAtt.setTermBuffer("multi" + (multiToken + 1));
        offsetAtt.setOffset(prevStartOffset, prevEndOffset);
        typeAtt.setType(prevType);
        posIncrAtt.setPositionIncrement(0);
        multiToken--;
        return true;
      } else {
        boolean next = input.incrementToken();
        if (next == false) {
          return false;
        }
        prevType = typeAtt.type();
        prevStartOffset = offsetAtt.startOffset();
        prevEndOffset = offsetAtt.endOffset();
        String text = termAtt.term();
        if (text.equals("triplemulti")) {
          multiToken = 2;
          return true;
        } else if (text.equals("multi")) {
          multiToken = 1;
          return true;
        } else {
          return true;
        }
      }
    }

  }

  /**
   * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
   * correctly for input other than "the quick brown ...".
   */
  private class PosIncrementAnalyzer extends Analyzer {

    public PosIncrementAnalyzer() {
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(reader);
      result = new TestPosIncrementFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
  }

  private class TestPosIncrementFilter extends TokenFilter {

    TermAttribute termAtt;
    PositionIncrementAttribute posIncrAtt;

    public TestPosIncrementFilter(TokenStream in) {
      super(in);
      termAtt = (TermAttribute) addAttribute(TermAttribute.class);
      posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
    }

    private Token token = new Token();

    /*
     * (non-Javadoc)
     *
     * @see org.apache.lucene.analysis.TokenStream#next()
     */
    @Override
    public Token next(Token reusableToken) throws IOException {
      while (null != (reusableToken = input.next(token))) {
        String term = reusableToken.term();
        if (term.equals("the")) {
          // stopword, do nothing
        } else if (term.equals("quick")) {
          reusableToken.setPositionIncrement(2);
          return reusableToken;
        } else {
          reusableToken.setPositionIncrement(1);
          return reusableToken;
        }
      }
      return null;
    }

    public final boolean incrementToken() throws java.io.IOException {
      while (input.incrementToken()) {
        if (termAtt.term().equals("the")) {
          // stopword, do nothing
        } else if (termAtt.term().equals("quick")) {
          posIncrAtt.setPositionIncrement(2);
          return true;
        } else {
          posIncrAtt.setPositionIncrement(1);
          return true;
        }
      }
      return false;
    }

  }

}
TOP

Related Classes of org.apache.lucene.queryParser.standard.TestMultiAnalyzerWrapper$TestPosIncrementFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.