Source Code of org.elasticsearch.river.rss.integration.RssRiverAllParametersTest

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package org.elasticsearch.river.rss.integration;


import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.base.Predicate;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.river.rss.RssToJson;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.After;
import org.junit.Test;


import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.UUID;
import java.util.concurrent.TimeUnit;


import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.notNullValue;


/**
 * Test all river settings
 */
@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE)
public class RssRiverAllParametersTest extends ElasticsearchIntegrationTest {


    private XContentBuilder createRiver(boolean raw, String... names) throws IOException {
        XContentBuilder river = jsonBuilder().prettyPrint().startObject()
                .field("type", "rss")
                .startObject("rss");


        if (!raw) {
            river.field("raw", raw);
        }


        if (names.length > 0) {
            river.startArray("feeds");
            for (String name : names) {
                addLocalRiver(river, name);
            }
            river.endArray();
        }


        river.endObject()
                .startObject("index")
                    .field("flush_interval", "500ms")
                .endObject()
                .endObject();


        logger.info("  --> river meant to be created: {}", river.string());


        return river;
    }


    private void startRiver(final String riverName, final String lastupdate_id, XContentBuilder river) throws InterruptedException {
        logger.info("  --> starting river [{}]", riverName);
        createIndex(riverName);
        index("_river", riverName, "_meta", river);


        // We wait for some documents been processed
        waitForChange(riverName, lastupdate_id);


        // Make sure we refresh indexed docs before launching tests
        refresh();
    }


    private void waitForChange(final String riverName, final String lastupdate_id) throws InterruptedException {
        String date = "";
        GetResponse getResponse = client().prepareGet("_river", riverName, lastupdate_id)
                .setFields("rss." + lastupdate_id)
                .execute().actionGet();
        if (getResponse.isExists()) {
            date = getResponse.getField("rss." + lastupdate_id).getValue().toString();
        }


        final String finalDate = date;
        assertThat("Date should have changed " + date, awaitBusy(new Predicate<Object>() {
            @Override
            public boolean apply(Object o) {
                GetResponse getResponse = client().prepareGet("_river", riverName, lastupdate_id)
                        .setFields("rss." + lastupdate_id)
                        .execute().actionGet();
                String new_date = "";
                if (getResponse.isExists() && getResponse.getField("rss." + lastupdate_id) != null) {
                    new_date = getResponse.getField("rss." + lastupdate_id).getValue().toString();
                }
                return !finalDate.equals(new_date);
            }
        }, 10, TimeUnit.SECONDS), equalTo(true));
    }


    @After
    public void tearDown() throws Exception {
        logger.info("  --> stopping rivers");
        // We need to make sure that the _river is stopped
        cluster().wipeIndices("_river");


        // We have to wait a little because it could throw java.lang.RuntimeException
        Thread.sleep(1000);
        super.tearDown();
    }


    private File URItoFile(URL url) {
        try {
            return new File(url.toURI());
        } catch(URISyntaxException e) {
            return new File(url.getPath());
        }
    }


    private String getUrl(String dir) throws IOException {
        URL resource = RssRiverAllParametersTest.class.getResource("/elasticsearch.yml");
        File parent = URItoFile(resource).getParentFile();
        String filename = parent.getCanonicalPath() + File.separator + dir;
        File dataDir = new File(filename);
        if (!dataDir.exists()) {
            logger.error("directory [src/test/resources/{}] should be copied to [{}]", dir, filename);
            throw new RuntimeException("src/test/resources/" + dir + " doesn't seem to exist. Check your JUnit tests.");
        }


        return dataDir.toURI().toString();
    }


    private String getLastUpdatedId(String name) throws IOException {
        return "_lastupdated_" + UUID.nameUUIDFromBytes(getUrl(name + File.separator + "rss.xml").getBytes()).toString();
    }


    /**
     * Add a river definition
     * @param xcb current xcontent builder
     * @param name feed name (optional)
     */
    private void addLocalRiver(XContentBuilder xcb, String name) throws IOException {
        addRiver(xcb, getUrl(name + File.separator + "rss.xml"), name);
    }


    /**
     * Add a river definition
     * @param xcb current xcontent builder
     * @param url URL to add
     * @param name feed name (optional)
     */
    private void addRiver(XContentBuilder xcb, String url, String name) {
        try {
            xcb.startObject()
                    .field("url", url)
                    .field("update_rate", 10000);
            if (name != null) {
                xcb.field("name", name);
            }
            xcb.endObject();
        } catch (Exception e) {
            logger.error("fail to add river feed url [{}]", url);
            fail("fail to add river feed");
        }
    }


    private void existSomeDocs(final String index) throws InterruptedException {
        existSomeDocs(index, null);
    }


    private void existSomeDocs(final String index, final String source) throws InterruptedException {
        // We wait up to 5 seconds before considering a failing test
        assertThat("Some documents should exist...", awaitBusy(new Predicate<Object>() {
            @Override
            public boolean apply(Object o) {
                QueryBuilder query;
                if (source == null) {
                    query = QueryBuilders.matchAllQuery();
                } else {
                    query = QueryBuilders.queryString(source).defaultField("feedname");
                }
                CountResponse response = client().prepareCount(index)
                        .setQuery(query).execute().actionGet();
                return response.getCount() > 0;
            }
        }, 10, TimeUnit.SECONDS), equalTo(true));
    }


    /**
     * Index http://www.lemonde.fr/rss/une.xml sample
     */
    @Test
    public void test_simple_river() throws IOException, InterruptedException {
        startRiver("simple", getLastUpdatedId("lemonde"), createRiver(true, "lemonde"));


        // We wait for some documents
        existSomeDocs("simple");
    }


    /**
     * Index http://www.lemonde.fr/rss/une.xml sample
     * Index http://rss.lefigaro.fr/lefigaro/laune sample
     */
    @Test
    public void test_multiple_river() throws IOException, InterruptedException {
        startRiver("multiple", getLastUpdatedId("lemonde"), createRiver(true, "lemonde", "lefigaro"));


        // We wait for some documents
        existSomeDocs("multiple", "lemonde");
        existSomeDocs("multiple", "lefigaro");
    }


    /**
     * http://www.malwaredomains.com/wordpress/?feed=rss
     * http://www.darkreading.com/rss/all.xml
     */
    @Test
    public void test_mcapp_rivers() throws IOException, InterruptedException {
        startRiver("mcapp", getLastUpdatedId("malwaredomains"), createRiver(true, "malwaredomains", "darkreading"));


        // We wait for some documents
        existSomeDocs("mcapp", "malwaredomains");
        existSomeDocs("mcapp", "darkreading");


        SearchResponse response = client().prepareSearch("mcapp")
                .setQuery(QueryBuilders.matchQuery("description", "domains"))
                .addField(RssToJson.Rss.RAW + ".html")
                .execute().actionGet();
        assertThat(response.getHits().getTotalHits(), greaterThan(0L));
        assertThat(response.getHits().getAt(0).field(RssToJson.Rss.RAW + ".html"), notNullValue());
        assertThat(response.getHits().getAt(0).field(RssToJson.Rss.RAW + ".html").getValues(), notNullValue());
    }


    /**
     * http://www.dcrainmaker.com/feed
     * with raw: false
     */
    @Test
    public void test_raw_false() throws IOException, InterruptedException {
        startRiver("dcrainmaker", getLastUpdatedId("dcrainmaker"), createRiver(false, "dcrainmaker"));


        // We wait for some documents
        existSomeDocs("dcrainmaker", "dcrainmaker");


        SearchResponse response = client().prepareSearch("dcrainmaker")
                .setQuery(QueryBuilders.matchQuery("description", "friends"))
                .addField(RssToJson.Rss.RAW + ".html")
                .execute().actionGet();
        assertThat(response.getHits().getTotalHits(), greaterThan(0L));
        assertThat(response.getHits().getAt(0).field(RssToJson.Rss.RAW + ".html"), nullValue());
    }


    /**
     * Index http://www.feedforall.com/sample.xml sample
     * Related to issue https://github.com/dadoonet/rssriver/issues/33
     */
    @Test
    public void test_feedforall_33() throws IOException, InterruptedException {
        startRiver("feedforall", getLastUpdatedId("feedforall"), createRiver(true, "feedforall"));


        // We wait for some documents
        existSomeDocs("feedforall");
    }


    /**
     * Index http://rss.nytimes.com/services/xml/rss/nyt/InternationalHome.xml sample
     * Related to issue https://github.com/dadoonet/rssriver/issues/34
     * Create mapping for different 'content' and 'url' field
     */
    @Test
    public void test_nytimes_34() throws IOException, InterruptedException {
        startRiver("nytimes", getLastUpdatedId("nytimes"), createRiver(true, "nytimes"));


        // We wait for some documents
        existSomeDocs("nytimes");
    }
}
Source Code of org.elasticsearch.river.rss.integration.RssRiverAllParametersTest

Related Classes of org.elasticsearch.river.rss.integration.RssRiverAllParametersTest