Package us.codecraft.webmagic.pipeline

Examples of us.codecraft.webmagic.pipeline.Pipeline


public class SpiderTest {

    @Ignore("long time")
    @Test
    public void testStartAndStop() throws InterruptedException {
        Spider spider = Spider.create(new SimplePageProcessor("http://www.oschina.net/", "http://www.oschina.net/*")).addPipeline(new Pipeline() {
            @Override
            public void process(ResultItems resultItems, Task task) {
                System.out.println(1);
            }
        }).thread(1);
View Full Code Here


                .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
        pageProcessor.getSite().setSleepTime(params.getSleepTime());
        pageProcessor.getSite().setRetryTimes(3);
        pageProcessor.getSite().setAcceptStatCode(Sets.<Integer>newHashSet(200, 404,403, 500,502));
        Spider spider = Spider.create(pageProcessor).thread(params.getThread());
        spider.clearPipeline().addPipeline(new Pipeline() {
            @Override
            public void process(ResultItems resultItems, Task task) {

            }
        });
View Full Code Here

        return Site.me().addStartUrl("https://github.com/code4craft/webmagic");
    }

    @Test
    public void test() {
        OOSpider.create(new GithubRepoProcessor()).addPipeline(new Pipeline() {
            @Override
            public void process(ResultItems resultItems, Task task) {
                Assert.assertEquals("78",((String)resultItems.get("star")).trim());
                Assert.assertEquals("65",((String)resultItems.get("fork")).trim());
            }
View Full Code Here

TOP

Related Classes of us.codecraft.webmagic.pipeline.Pipeline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.