Package de.spinscale.elasticsearch.module.suggest.test

Source Code of de.spinscale.elasticsearch.module.suggest.test.AbstractSuggestTest

package de.spinscale.elasticsearch.module.suggest.test;

import de.spinscale.elasticsearch.action.suggest.statistics.FstStats;
import de.spinscale.elasticsearch.plugin.suggest.SuggestPlugin;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.hamcrest.Matchers.*;

@TestLogging("_root:info")
@Ignore
public abstract class AbstractSuggestTest extends ElasticsearchIntegrationTest {

    public final String index = randomAsciiOfLength(10).toLowerCase(Locale.ROOT);
    public final String type = randomAsciiOfLength(10).toLowerCase(Locale.ROOT);

    @Before
    public void createIndex() throws Exception {
        createIndexWithProductsMapping(index);
    }

    abstract public List<String> getSuggestions(SuggestionQuery suggestionQuery) throws Exception;
    abstract public void refreshAllSuggesters() throws Exception;
    abstract public void refreshIndexSuggesters(String index) throws Exception;
    abstract public void refreshFieldSuggesters(String index, String field) throws Exception;
    abstract public FstStats getStatistics() throws Exception;

    @Override
    protected Settings nodeSettings(int nodeOrdinal) {
        return settingsBuilder()
                .put(super.nodeSettings(nodeOrdinal))
                .put("plugin.types", SuggestPlugin.class.getName())
                .build();
    }

    @Test
    public void testThatSimpleSuggestionWorks() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "foo", "foob", "foobar", "boof");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "foo", 10);
        assertSuggestions(suggestions, "foo", "foob", "foobar");
    }

    @Test
    public void testThatAllFieldSuggestionsWorks() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "foo", "foob", "foobar", "boof");
        indexProducts(products);

        List<String> suggestions = getSuggestions("_all", "foo", 10);
        assertSuggestions(suggestions, "foo", "foob", "foobar");
    }

    @Test
    public void testThatSimpleSuggestionShouldSupportLimit() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "foo", "fooba", "foobar");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "foo", 2);
        assertSuggestions(suggestions, "foo", "fooba");
    }

    @Test
    public void testThatSimpleSuggestionShouldSupportLimitWithConcreteWord() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "foo", "fooba", "foobar");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "foo", 2);
        assertSuggestions(suggestions, "foo", "fooba");
    }

    @Test
    public void testThatSuggestionShouldNotContainDuplicates() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "foo", "foo", "foob");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "foo", 10);
        assertSuggestions(suggestions, "foo", "foob");
    }

    @Test
    public void testThatSuggestionShouldWorkOnDifferentFields() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "Kochjacke Pute", "Kochjacke Henne", "Kochjacke Hahn");
        products.get(0).put("Description", "Kochjacke Pute");
        products.get(1).put("Description", "Kochjacke Henne");
        products.get(2).put("Description", "Kochjacke Hahn");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "kochjacke", 10);
        assertSuggestions(suggestions, "kochjacke", "kochjacke hahn", "kochjacke henne", "kochjacke pute");

        suggestions = getSuggestions("Description", "Kochjacke", 10);
        assertSuggestions(suggestions, "Kochjacke Hahn", "Kochjacke Henne", "Kochjacke Pute");
    }

    @Test
    public void testThatSuggestionShouldWorkWithWhitespaces() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "Kochjacke Paul", "Kochjacke Pauline",
                "Kochjacke Paulinea");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "kochja", 10);
        assertSuggestions(suggestions, "kochjacke", "kochjacke paul", "kochjacke pauline", "kochjacke paulinea");

        suggestions = getSuggestions("ProductName.suggest", "kochjacke ", 10);
        assertSuggestions(suggestions, "kochjacke paul", "kochjacke pauline", "kochjacke paulinea");

        suggestions = getSuggestions("ProductName.suggest", "kochjacke pauline", 10);
        assertSuggestions(suggestions, "kochjacke pauline", "kochjacke paulinea");
    }

    @Test
    public void testThatSuggestionWithShingleWorksAfterUpdate() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "Kochjacke Paul", "Kochjacke Pauline",
                "Kochjacke Paulinator");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "kochjacke", 10);
        assertSuggestions(suggestions, "kochjacke", "kochjacke paul", "kochjacke paulinator", "kochjacke pauline");

        products = createProducts(1);
        products.get(0).put("ProductName", "Kochjacke PaulinPanzer");
        indexProducts(products);
        refresh();
        refreshAllSuggesters();

        suggestions = getSuggestions("ProductName.suggest", "kochjacke paulin", 10);
        assertSuggestions(suggestions, "kochjacke paulinator", "kochjacke pauline", "kochjacke paulinpanzer");

        cleanIndex();
        refreshAllSuggesters();
        suggestions = getSuggestions("ProductName.suggest", "kochjacke paulin", 10);
        assertThat(suggestions.size(), is(0));
    }

    @Test
    public void testThatSuggestionWorksWithSimilarity() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "kochjacke bla", "kochjacke blubb",
                "kochjacke blibb", "kochjacke paul");
        indexProducts(products);

        List<String> suggestions = getSuggestions("ProductName.suggest", "kochajcke", 10, 0.75f);
        assertThat(suggestions, hasSize(1));
        assertThat(suggestions, contains("kochjacke"));
    }

    @Ignore("This test is useless in this setup, as it may return better/more data than expected and therefore fails")
    @Test
    public void testThatRefreshingPerIndexWorks() throws Exception {
        // having routing ensures that all the data is written into one shard
        // this ensures that when adding the second product it is added to the same shard
        // if it is not added to the same shard, refreshing might not work as expected
        // in case the data is added to a shard where there was no data in before, it is added immediately to the
        // suggestions, this means more results than expected might be returned in the last line of this test
        createIndexWithProductsMapping("secondproductsindex");

        List<Map<String, Object>> products = createProducts("ProductName", "autoreifen", "autorad");
        indexProducts(products, index, "someRoutingKey");
        indexProducts(products, "secondproductsindex", "someRoutingKey");

        // get suggestions from both indexes to create fst structures
        SuggestionQuery productsQuery = new SuggestionQuery(index, type, "ProductName.suggest", "auto");
        SuggestionQuery secondProductsIndexQuery = new SuggestionQuery("secondproductsindex", type, "ProductName.suggest", "auto");
        getSuggestions(productsQuery);
        getSuggestions(secondProductsIndexQuery);

        // index another product
        List<Map<String, Object>> newProducts = createProducts("ProductName", "automatik");
        indexProducts(newProducts, index, "someRoutingKey");
        indexProducts(newProducts, "secondproductsindex", "someRoutingKey");

        refreshIndexSuggesters(index);

        assertSuggestions(productsQuery, "automatik", "autorad", "autoreifen");
        assertSuggestions(secondProductsIndexQuery, "autorad", "autoreifen");
    }

    @Ignore("This test is useless in this setup, as it may return better/more data than expected and therefore fails")
    @Test
    public void testThatRefreshingPerIndexFieldWorks() throws Exception {
        // having routing ensures that all the data is written into one shard
        // this ensures that when adding the second product it is added to the same shard
        // if it is not added to the same shard, refreshing might not work as expected
        // in case the data is added to a shard where there was no data in before, it is added immediately to the
        // suggestions, this means more results than expected might be returned in the last line of this test
        List<Map<String, Object>> products = createProducts("ProductName", "autoreifen", "autorad");
        indexProducts(products, index, "someRoutingKey");

        SuggestionQuery suggestionQuery = new SuggestionQuery(index, type, "ProductName.suggest", "auto");
        SuggestionQuery lowerCaseQuery = new SuggestionQuery(index, type, "ProductName.lowercase", "auto");
        getSuggestions(suggestionQuery);
        getSuggestions(lowerCaseQuery);

        List<Map<String, Object>> newProducts = createProducts("ProductName", "automatik");
        indexProducts(newProducts, index, "someRoutingKey");

        refreshFieldSuggesters(index, "ProductName.suggest");

        assertSuggestions(suggestionQuery, "automatik", "autorad", "autoreifen");
        assertSuggestions(lowerCaseQuery, "autorad", "autoreifen");
    }

    @Test
    public void testThatAnalyzingSuggesterWorks() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "BMW 318", "BMW 528", "BMW M3",
                "the BMW 320", "VW Jetta");
        indexProducts(products);

        SuggestionQuery query = new SuggestionQuery(index, type, "ProductName.keyword", "b")
                .suggestType("full").analyzer("simple").size(10);
        List<String> suggestions = getSuggestions(query);

        assertSuggestions(suggestions, "BMW 318", "BMW 528", "BMW M3");
    }

    @Test
    public void testThatAnalyzingSuggesterSupportsStopWords() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "BMW 318", "BMW 528", "BMW M3",
                "the BMW 320", "VW Jetta");
        indexProducts(products);

        SuggestionQuery query = new SuggestionQuery(index, type, "ProductName.keyword", "b")
                .suggestType("full").indexAnalyzer("stop").queryAnalyzer("stop")
                .preservePositionIncrements(false).size(10);
        List<String> suggestions = getSuggestions(query);

        assertSuggestions(suggestions, "BMW 318", "BMW 528", "BMW M3", "the BMW 320");
    }

    @Test
    public void testThatFuzzySuggesterWorks() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "BMW 318", "BMW 528", "BMW M3",
                "the BMW 320", "VW Jetta");
        indexProducts(products);

        SuggestionQuery query = new SuggestionQuery(index, type, "ProductName.keyword", "bwm")
                .suggestType("fuzzy").analyzer("standard").size(10);
        List<String> suggestions = getSuggestions(query);

        assertSuggestions(suggestions, "BMW 318", "BMW 528", "BMW M3");
    }

    @Test
    public void testThatFlushForcesReloadingOfAllFieldsWithoutErrors() throws Exception {
        List<Map<String, Object>> products = createProducts("ProductName", "BMW 318");
        indexProducts(products);

        SuggestionQuery query = new SuggestionQuery(index, type, "ProductName.keyword", "bwm").suggestType("full");
        getSuggestions(query);

        // add data to index and flush
        // indexProducts(createProducts("ProductName", "BMW 320"), node);
        client().prepareIndex(index, type, "foo").setSource(createProducts(1).get(0)).execute().actionGet();
        client().admin().indices().prepareFlush(index).execute().actionGet();
        getSuggestions(query);
    }

    @Test
    public void gettingStatisticsShouldWork() throws Exception {
        // needed to make sure that we hit the already queried shards for stats, the other are empty
        Settings settings = settingsBuilder()
                .put("index.number_of_replicas", 0)
                .build();
        UpdateSettingsResponse response = client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get();
        assertThat(response.isAcknowledged(), is(true));

        List<Map<String, Object>> products = createProducts("ProductName",
                "BMW 318", "BMW 528", "BMW M3", "the BMW 320", "VW Jetta");
        indexProducts(products);

        FstStats emptyFstStats = getStatistics();
        assertThat(emptyFstStats.getStats(), hasSize(0));
        assertThat(getFstSizeSum(emptyFstStats), equalTo(0L));

        SuggestionQuery query = new SuggestionQuery(index, type, "ProductName.keyword", "b")
                .suggestType("full").analyzer("stop").size(10);
        List<String> suggestions = getSuggestions(query);
        assertSuggestions(suggestions, "BMW 318", "BMW 528", "BMW M3", "the BMW 320");

        FstStats filledFstStats = getStatistics();
        assertThat(filledFstStats.getStats(), hasSize(greaterThanOrEqualTo(1)));

        List<FstStats.FstIndexShardStats> allStats = Lists.newArrayList(filledFstStats.getStats());
        assertThat(allStats.get(0).getShardId().id(), greaterThanOrEqualTo(0));
        assertThat(getFstSizeSum(filledFstStats), greaterThan(0L));
    }

    private long getFstSizeSum(FstStats fstStats) {
        long totalFstSize = 0;

        for (FstStats.FstIndexShardStats stats : fstStats.getStats()) {
            totalFstSize += stats.getSizeInBytes();
        }

        return totalFstSize;
    }

//    @Test
//    public void performanceTest() throws Exception {
//        List<Map<String, Object>> products = createProducts(60000);
//        indexProducts(products);
//
//        System.out.println(measureSuggestTime("a"));
//        System.out.println(measureSuggestTime("aa"));
//        System.out.println(measureSuggestTime("aaa"));
//        System.out.println(measureSuggestTime("aaab"));
//        System.out.println(measureSuggestTime("aaabc"));
//        System.out.println(measureSuggestTime("aaabcd"));
//    }
//
//    private long measureSuggestTime(String search) throws Exception {
//        long start = System.currentTimeMillis();
//        getSuggestions("ProductName.suggest", "aaa", 10);
//        long end = System.currentTimeMillis();
//
//        return end - start;
//    }

    private List<String> getSuggestions(String field, String term, Integer size) throws Exception {
        return getSuggestions(new SuggestionQuery(index, type, field, term).size(size));
    }

    private List<String> getSuggestions(String field, String term, Integer size, Float similarity) throws Exception {
        return getSuggestions(new SuggestionQuery(index, type, field, term).size(size).similarity(similarity));
    }

    private void assertSuggestions(List<String> suggestions, String ... terms) {
        String msg = String.format(Locale.ROOT, "%s should have size %s, content %s", suggestions, terms.length, Arrays.asList(terms));
        assertThat(msg, suggestions, hasSize(terms.length));
        assertThat("Suggestions are: " + suggestions, suggestions, contains(terms));
    }

    private void assertSuggestions(SuggestionQuery query, String ... terms) throws Exception {
        List<String> suggestions = getSuggestions(query);
        String assertionError = String.format(Locale.ROOT, "%s for query %s should be %s", suggestions, query, Arrays.asList(terms));
        assertThat(assertionError, suggestions, hasSize(terms.length));
        assertThat("Suggestions are: " + suggestions, suggestions, contains(terms));
    }

    private void cleanIndex() {
        client().prepareDeleteByQuery(index).setTypes("product").setQuery(matchAllQuery()).get();
    }


    private void createIndexWithProductsMapping(String indexName) throws IOException {
        String settingsData = IOUtils.toString(this.getClass().getResourceAsStream("/product.json"));
        CreateIndexResponse createIndexResponse = client().admin().indices().prepareCreate(indexName)
                .setSource(settingsData).execute().actionGet();
        assertThat(createIndexResponse.isAcknowledged(), is(true));

        client().admin().cluster().prepareHealth(indexName).setWaitForGreenStatus().execute().actionGet();
    }

    protected void indexProducts(List<Map<String, Object>> products) throws Exception {
        indexProducts(products, index);
    }

    private void indexProducts(List<Map<String, Object>> products, String index) throws Exception {
        indexProducts(products, index, null);
    }

    private void indexProducts(List<Map<String, Object>> products, String index, String routing) throws Exception {
        long currentCount = getCurrentDocumentCount(index);
        BulkRequest bulkRequest = new BulkRequest();
        for (Map<String, Object> product : products) {
            IndexRequest indexRequest = new IndexRequest(index, "product", (String)product.get("ProductId"));
            indexRequest.source(product);
            if (Strings.hasLength(routing)) {
                indexRequest.routing(routing);
            }
            bulkRequest.add(indexRequest);
        }
        bulkRequest.refresh(true);
        BulkResponse response = client().bulk(bulkRequest).actionGet();
        if (response.hasFailures()) {
            fail("Error in creating products: " + response.buildFailureMessage());
        }

        assertDocumentCountAfterIndexing(index, products.size() + currentCount);
    }

    protected List<Map<String, Object>> createProducts(int count) {
        List<Map<String, Object>> products = Lists.newArrayList();

        for (int i = 0 ; i < count; i++) {
            Map<String, Object> product = Maps.newHashMap();
            product.put("ProductName", RandomStringUtils.randomAlphabetic(10));
            product.put("ProductId", i + "_" + RandomStringUtils.randomAlphabetic(10));
            products.add(product);
        }

        return products;
    }

    private List<Map<String, Object>> createProducts(String fieldName, String ... fields) {
        List<Map<String, Object>> products = createProducts(fields.length);

        for (int i = 0 ; i < fields.length ; i++) {
            products.get(i).put(fieldName, fields[i]);
        }

        return products;
    }

    private void assertDocumentCountAfterIndexing(String index, long expectedDocumentCount) throws Exception {
        assertThat(getCurrentDocumentCount(index), is(expectedDocumentCount));
    }

    private long getCurrentDocumentCount(String index) {
        return client().prepareCount(index).setQuery(matchAllQuery()).execute().actionGet(2000).getCount();
    }

}
TOP

Related Classes of de.spinscale.elasticsearch.module.suggest.test.AbstractSuggestTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.