Package org.apache.lucene.search.suggest.analyzing

Source Code of org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggesterTest

package org.apache.lucene.search.suggest.analyzing;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.File;
import java.io.Reader;
import java.util.List;
import java.util.Locale;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;

// Test requires postings offsets:
@SuppressCodecs({"Lucene3x","MockFixedIntBlock","MockVariableIntBlock","MockSep","MockRandom"})
public class AnalyzingInfixSuggesterTest extends LuceneTestCase {

  public void testBasic() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));

    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    assertEquals("lend me your <b>ear</b>", results.get(1).key);
    assertEquals(8, results.get(1).value);
    assertEquals(new BytesRef("foobar"), results.get(1).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("lend me your <b>ear</b>", results.get(0).key);
    assertEquals(8, results.get(0).value);
    assertEquals(new BytesRef("foobar"), results.get(0).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    suggester.close();
  }

  public void testAfterLoad() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    suggester.close();

    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    suggester.close();
  }

  public void testRandomMinPrefixLength() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    int minPrefixLength = random().nextInt(10);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));

    for(int i=0;i<2;i++) {
      for(int j=0;j<2;j++) {
        boolean doHighlight = j == 0;

        List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, doHighlight);
        assertEquals(2, results.size());
        if (doHighlight) {
          assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        if (doHighlight) {
          assertEquals("lend me your <b>ear</b>", results.get(1).key);
        } else {
          assertEquals("lend me your ear", results.get(1).key);
        }
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);
        assertEquals(8, results.get(1).value);
        assertEquals(new BytesRef("foobar"), results.get(1).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("lend me your <b>ear</b>", results.get(0).key);
        } else {
          assertEquals("lend me your ear", results.get(0).key);
        }
        assertEquals(8, results.get(0).value);
        assertEquals(new BytesRef("foobar"), results.get(0).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);
      }

      // Make sure things still work after close and reopen:
      suggester.close();
      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
          @Override
          protected Directory getDirectory(File path) {
            return newFSDirectory(path);
          }
        };
    }
    suggester.close();
  }

  public void testHighlight() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();
  }

  public void testHighlightCaseChange() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>Penny</b> saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();

    // Try again, but overriding addPrefixMatch to normalize case:
    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
          prefixToken = prefixToken.toLowerCase(Locale.ROOT);
          String surfaceLower = surface.toLowerCase(Locale.ROOT);
          sb.append("<b>");
          if (surfaceLower.startsWith(prefixToken)) {
            sb.append(surface.substring(0, prefixToken.length()));
            sb.append("</b>");
            sb.append(surface.substring(prefixToken.length()));
          } else {
            sb.append(surface);
            sb.append("</b>");
          }
        }

        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();
  }

  public void testDoubleClose() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    suggester.close();
    suggester.close();
  }

  public void testForkLastToken() throws Exception {
    Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          MockTokenizer tokens = new MockTokenizer(reader);
          // ForkLastTokenFilter is a bit evil:
          tokens.setEnableChecks(false);
          return new TokenStreamComponents(tokens,
                                           new StopKeywordFilter(TEST_VERSION_CURRENT,
                                                                 new ForkLastTokenFilter(tokens), StopKeywordFilter.makeStopSet(TEST_VERSION_CURRENT, "a")));
        }
      };

    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a bob for apples", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Query finishQuery(BooleanQuery in, boolean allTermsRequired) {
          List<BooleanClause> clauses = in.clauses();
          if (clauses.size() >= 2 && allTermsRequired) {
            String t1 = getTerm(clauses.get(clauses.size()-2).getQuery());
            String t2 = getTerm(clauses.get(clauses.size()-1).getQuery());
            if (t1.equals(t2)) {
              // The last 2 tokens came from
              // ForkLastTokenFilter; we remove them and
              // replace them with a MUST BooleanQuery that
              // SHOULDs the two of them together:
              BooleanQuery sub = new BooleanQuery();
              BooleanClause other = clauses.get(clauses.size()-2);
              sub.add(new BooleanClause(clauses.get(clauses.size()-2).getQuery(), BooleanClause.Occur.SHOULD));
              sub.add(new BooleanClause(clauses.get(clauses.size()-1).getQuery(), BooleanClause.Occur.SHOULD));
              clauses.subList(clauses.size()-2, clauses.size()).clear();
              clauses.add(new BooleanClause(sub, BooleanClause.Occur.MUST));
            }
          }
          return in;
        }

        private String getTerm(Query query) {
          if (query instanceof TermQuery) {
            return ((TermQuery) query).getTerm().text();
          } else if (query instanceof PrefixQuery) {
            return ((PrefixQuery) query).getPrefix().text();
          } else {
            return null;
          }
        }

        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };

    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("a", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a bob for <b>a</b>pples", results.get(0).key);
    suggester.close();
  }
}
TOP

Related Classes of org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggesterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.