Package org.codelibs.elasticsearch.web.robot.transformer

Source Code of org.codelibs.elasticsearch.web.robot.transformer.ScrapingTransformerTest

package org.codelibs.elasticsearch.web.robot.transformer;

import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;

import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.codelibs.elasticsearch.web.config.RiverConfig;
import org.codelibs.robot.entity.ResponseData;
import org.codelibs.robot.entity.ResultData;
import org.junit.Test;
import org.seasar.framework.util.ResourceUtil;

public class ScrapingTransformerTest {
    @Test
    public void fess_codelibs_org() {
        RiverConfig riverConfig = new RiverConfig();
        ScrapingTransformer transformer = new ScrapingTransformer() {
            @SuppressWarnings("unchecked")
            @Override
            protected void storeIndex(ResponseData responseData,
                    Map<String, Object> dataMap) {
                System.out.println(dataMap);
                assertThat(
                        ((List<String>) ((Map<String, Object>) dataMap.get("nav"))
                                .get("sideMenus")).size(), is(27));
                assertThat(
                        ((Map<String, Object>) dataMap.get("section1")).get(
                                "title").toString(), is("What is Fess?"));
                assertThat(
                        ((List<String>) ((Map<String, Object>) dataMap.get("section1"))
                                .get("body")).size(), is(2));
                assertThat(
                        ((Map<String, Object>) dataMap.get("section2")).get(
                                "title").toString(), is("Features"));
                assertThat(
                        ((List<String>) ((Map<String, Object>) dataMap.get("section2"))
                                .get("body")).size(), is(12));
            }
        };
        transformer.riverConfig = riverConfig;

        String sessionId = "test";
        String url = "http://fess.codelibs.org/";

        Map<String, Map<String, Object>> scrapingRuleMap = new HashMap<String, Map<String, Object>>();
        addScrapingRuleMap(scrapingRuleMap, "text", "nav.sideMenus",
                "div.sidebar-nav ul li", Boolean.TRUE, Boolean.TRUE);
        addScrapingRuleMap(scrapingRuleMap, "text", "section1.title",
                "div.section:eq(0) h2", null, null);
        addScrapingRuleMap(scrapingRuleMap, "text", "section1.body",
                "div.section:eq(0) p", Boolean.TRUE, Boolean.TRUE);
        addScrapingRuleMap(scrapingRuleMap, "text", "section2.title",
                "div.section:eq(1) h2", null, null);
        addScrapingRuleMap(scrapingRuleMap, "text", "section2.body",
                "div.section:eq(1) ul li", Boolean.TRUE, Boolean.TRUE);
        Map<String, Object> patternMap = new HashMap<String, Object>();
        patternMap.put("url", url);
        riverConfig.addScrapingRule(sessionId,null, patternMap, scrapingRuleMap);
        InputStream is = null;
        try {
            is = ResourceUtil
                    .getResourceAsStream("html/fess_codelibs_org.html");
            ResponseData responseData = new ResponseData();
            responseData.setSessionId(sessionId);
            responseData.setUrl(url);
            responseData.setResponseBody(is);
            responseData.setCharSet("UTF-8");
            ResultData resultData = new ResultData();

            transformer.storeData(responseData, resultData);
        } finally {
            IOUtils.closeQuietly(is);
        }
    }

    private void addScrapingRuleMap(
            Map<String, Map<String, Object>> scrapingRuleMap, String type,
            String property, String path, Boolean isArray, Boolean trimSpaces) {
        Map<String, Object> valueMap = new HashMap<String, Object>();
        valueMap.put(type, path);
        if (isArray != null) {
            valueMap.put("isArray", isArray);
        }
        if (trimSpaces != null) {
            valueMap.put("trimSpaces", trimSpaces);
        }
        scrapingRuleMap.put(property, valueMap);
    }
}
TOP

Related Classes of org.codelibs.elasticsearch.web.robot.transformer.ScrapingTransformerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.