Package org.apache.lucene.search.suggest.analyzing

Source Code of org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggesterTest

package org.apache.lucene.search.suggest.analyzing;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.File;
import java.io.Reader;
import java.util.List;
import java.util.Locale;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;

// Test requires postings offsets:
@SuppressCodecs({"Lucene3x","MockFixedIntBlock","MockVariableIntBlock","MockSep","MockRandom"})
public class AnalyzingInfixSuggesterTest extends LuceneTestCase {

  public void testBasic() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));

    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    assertEquals("lend me your <b>ear</b>", results.get(1).key);
    assertEquals(8, results.get(1).value);
    assertEquals(new BytesRef("foobar"), results.get(1).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("lend me your <b>ear</b>", results.get(0).key);
    assertEquals(8, results.get(0).value);
    assertEquals(new BytesRef("foobar"), results.get(0).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);

    suggester.close();
  }

  public void testAfterLoad() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    suggester.close();

    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    suggester.close();
  }

  public void testRandomMinPrefixLength() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    int minPrefixLength = random().nextInt(10);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
        @Override
        protected Directory getDirectory(File path) {
          return newFSDirectory(path);
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));

    for(int i=0;i<2;i++) {
      for(int j=0;j<2;j++) {
        boolean doHighlight = j == 0;

        List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, doHighlight);
        assertEquals(2, results.size());
        if (doHighlight) {
          assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        if (doHighlight) {
          assertEquals("lend me your <b>ear</b>", results.get(1).key);
        } else {
          assertEquals("lend me your ear", results.get(1).key);
        }
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);
        assertEquals(8, results.get(1).value);
        assertEquals(new BytesRef("foobar"), results.get(1).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("lend me your <b>ear</b>", results.get(0).key);
        } else {
          assertEquals("lend me your ear", results.get(0).key);
        }
        assertEquals(8, results.get(0).value);
        assertEquals(new BytesRef("foobar"), results.get(0).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);

        results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, doHighlight);
        assertEquals(1, results.size());
        if (doHighlight) {
          assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).key);
        } else {
          assertEquals("a penny saved is a penny earned", results.get(0).key);
        }
        assertEquals(10, results.get(0).value);
        assertEquals(new BytesRef("foobaz"), results.get(0).payload);
      }

      // Make sure things still work after close and reopen:
      suggester.close();
      suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
          @Override
          protected Directory getDirectory(File path) {
            return newFSDirectory(path);
          }
        };
    }
    suggester.close();
  }

  public void testHighlight() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();
  }

  public void testHighlightCaseChange() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>Penny</b> saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();

    // Try again, but overriding addPrefixMatch to normalize case:
    suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
          prefixToken = prefixToken.toLowerCase(Locale.ROOT);
          String surfaceLower = surface.toLowerCase(Locale.ROOT);
          sb.append("<b>");
          if (surfaceLower.startsWith(prefixToken)) {
            sb.append(surface.substring(0, prefixToken.length()));
            sb.append("</b>");
            sb.append(surface.substring(prefixToken.length()));
          } else {
            sb.append(surface);
            sb.append("</b>");
          }
        }

        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).key);
    suggester.close();
  }

  public void testDoubleClose() throws Exception {
    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
    };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };
    suggester.build(new TermFreqPayloadArrayIterator(keys));
    suggester.close();
    suggester.close();
  }

  public void testSuggestStopFilter() throws Exception {
    final CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "a");
    Analyzer indexAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          MockTokenizer tokens = new MockTokenizer(reader);
          return new TokenStreamComponents(tokens,
                                           new StopFilter(TEST_VERSION_CURRENT, tokens, stopWords));
        }
      };

    Analyzer queryAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          MockTokenizer tokens = new MockTokenizer(reader);
          return new TokenStreamComponents(tokens,
                                           new SuggestStopFilter(tokens, stopWords));
        }
      };

    File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");

    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
        @Override
        protected Directory getDirectory(File path) {
          return newDirectory();
        }
      };

    TermFreqPayload keys[] = new TermFreqPayload[] {
      new TermFreqPayload("a bob for apples", 10, new BytesRef("foobaz")),
    };

    suggester.build(new TermFreqPayloadArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("a", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a bob for <b>a</b>pples", results.get(0).key);
    suggester.close();
  }
}
TOP

Related Classes of org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggesterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.