Package org.infinispan.query.analysis

Source Code of org.infinispan.query.analysis.SolrAnalyzerTest

package org.infinispan.query.analysis;

import junit.framework.Assert;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.hibernate.search.util.AnalyzerUtils;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.query.Search;
import org.infinispan.query.SearchManager;
import org.infinispan.test.SingleCacheManagerTest;
import org.infinispan.test.fwk.TestCacheManagerFactory;
import org.testng.annotations.Test;

import static junit.framework.Assert.assertEquals;

/**
* Copied and adapted from Hibernate Search
* org.hibernate.search.test.analyzer.solr.SolrAnalyzerTest
*
* @author Sanne Grinovero <sanne@hibernate.org> (C) 2012 Red Hat Inc.
* @author Emmanuel Bernard
* @author Hardy Ferentschik
*/
@Test(groups = "functional", testName = "query.analysis.SolrAnalyzerTest")
public class SolrAnalyzerTest extends SingleCacheManagerTest {

   protected EmbeddedCacheManager createCacheManager() throws Exception {
      ConfigurationBuilder cfg = getDefaultStandaloneCacheConfig(true);
      cfg
         .indexing()
            .enable()
            .indexLocalOnly(false)
            .addProperty("hibernate.search.default.directory_provider", "ram")
            .addProperty("hibernate.search.lucene_version", "LUCENE_CURRENT");
      return TestCacheManagerFactory.createCacheManager(cfg);
   }

   /**
    * Tests that the token filters applied to <code>Team</code> are successfully created and used. Refer to
    * <code>Team</code> to see the exact definitions.
    *
    * @throws Exception in case the test fails
    */
   public void testAnalyzerDef() throws Exception {
      // create the test instance
      Team team = new Team();
      team.setDescription( "This is a D\u00E0scription" )// \u00E0 == � - ISOLatin1AccentFilterFactory should strip of diacritic
      team.setLocation( "Atlanta" );
      team.setName( "ATL team" );

      // persist and index the test object
      cache.put("id", team);
      SearchManager searchManager = Search.getSearchManager(cache);

      // execute several search to show that the right tokenizers were applies
      TermQuery query = new TermQuery( new Term( "description", "D\u00E0scription" ) );
      assertEquals(
            "iso latin filter should work.  � should be a now", 0, searchManager.getQuery( query ).list().size()
      );

      query = new TermQuery( new Term( "description", "is" ) );
      assertEquals(
            "stop word filter should work. is should be removed", 0, searchManager.getQuery( query ).list().size()
      );

      query = new TermQuery( new Term( "description", "dascript" ) );
      assertEquals(
            "snowball stemmer should work. 'dascription' should be stemmed to 'dascript'",
            1,
            searchManager.getQuery( query ).list().size()
      );
   }

   /**
    * Tests the analyzers defined on {@link Team}.
    *
    * @throws Exception in case the test fails.
    */
   public void testAnalyzers() throws Exception {
      SearchManager search = Search.getSearchManager(cache);

      Analyzer analyzer = search.getSearchFactory().getAnalyzer( "standard_analyzer" );
      String text = "This is just FOOBAR's";
      Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "This", "is", "just", "FOOBAR's" } );

      analyzer = search.getSearchFactory().getAnalyzer( "html_standard_analyzer" );
      text = "This is <b>foo</b><i>bar's</i>";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "This", "is", "foobar's" } );

      analyzer = search.getSearchFactory().getAnalyzer( "html_whitespace_analyzer" );
      text = "This is <b>foo</b><i>bar's</i>";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "This", "is", "foobar's" } );

      analyzer = search.getSearchFactory().getAnalyzer( "length_analyzer" );
      text = "ab abc abcd abcde abcdef";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "abc", "abcd", "abcde" } );

      analyzer = search.getSearchFactory().getAnalyzer( "length_analyzer" );
      text = "ab abc abcd abcde abcdef";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "abc", "abcd", "abcde" } );

      analyzer = search.getSearchFactory().getAnalyzer( "porter_analyzer" );
      text = "bikes bikes biking";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "bike", "bike", "bike" } );

      analyzer = search.getSearchFactory().getAnalyzer( "word_analyzer" );
      text = "CamelCase";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "Camel", "Case" } );

      analyzer = search.getSearchFactory().getAnalyzer( "synonym_analyzer" );
      text = "ipod cosmos";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "ipod", "i-pod", "universe", "cosmos" } );

      analyzer = search.getSearchFactory().getAnalyzer( "shingle_analyzer" );
      text = "please divide this sentence into shingles";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual(
            tokens,
            new String[] {
                  "please",
                  "please divide",
                  "divide",
                  "divide this",
                  "this",
                  "this sentence",
                  "sentence",
                  "sentence into",
                  "into",
                  "into shingles",
                  "shingles"
            }
      );

      analyzer = search.getSearchFactory().getAnalyzer( "pattern_analyzer" );
      text = "foo,bar";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "foo", "bar" } );

      // CharStreamFactories test
      analyzer = search.getSearchFactory().getAnalyzer( "mapping_char_analyzer" );
      text = "CORA\u00C7\u00C3O DE MEL\u00C3O";
      tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text );
      assertTokensEqual( tokens, new String[] { "CORACAO", "DE", "MELAO" } );
   }

   protected Class<?>[] getAnnotatedClasses() {
      return new Class[] {
            Team.class
      };
   }

   private static void assertTokensEqual(Token[] tokens, String[] strings) {
      Assert.assertEquals( strings.length, tokens.length );

      for ( int i = 0; i < tokens.length; i++ ) {
         Assert.assertEquals( "index " + i, strings[i], AnalyzerUtils.getTermText( tokens[i] ) );
      }
   }

}
TOP

Related Classes of org.infinispan.query.analysis.SolrAnalyzerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.