Package com.googlecode.flaxcrawler

Examples of com.googlecode.flaxcrawler.CrawlerConfiguration


    @Ignore
    public void testCrawlerController() throws MalformedURLException, CrawlerException, FileNotFoundException {

        out = new FileOutputStream(output, true);

        CrawlerConfiguration crawlerConfiguration = new CrawlerConfiguration();
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.addCrawler(new TestCrawler());
        crawlerConfiguration.setPolitenessPeriod(1000);
        crawlerConfiguration.setMaxParallelRequests(1);
        CrawlerController crawlerController = new CrawlerController(crawlerConfiguration);
        crawlerController.addSeed(new URL("http://lenta.ru/"));
        crawlerController.start();
        crawlerController.join();
    }
View Full Code Here


        DefaultDownloaderController downloaderController = new DefaultDownloaderController();
        // Setting up parser controller
        DefaultParserController parserController = new DefaultParserController();

        // Creating crawler configuration object
        CrawlerConfiguration configuration = new CrawlerConfiguration();

        // Creating five crawlers (to work with 5 threads)
        for (int i = 0; i < 5; i++) {
            // Creating crawler and setting downloader and parser controllers
            DefaultCrawler crawler = new ExampleCrawler();
            crawler.setDownloaderController(downloaderController);
            crawler.setParserController(parserController);
            // Adding crawler to the configuration object
            configuration.addCrawler(crawler);
        }

        // Setting maximum parallel requests to a single site limit
        configuration.setMaxParallelRequests(1);
        // Setting http errors limits. If this limit violated for any
        // site - crawler will stop this site processing
        configuration.setMaxHttpErrors(HttpURLConnection.HTTP_CLIENT_TIMEOUT, 10);
        configuration.setMaxHttpErrors(HttpURLConnection.HTTP_BAD_GATEWAY, 10);
        // Setting period between two requests to a single site (in milliseconds)
        configuration.setPolitenessPeriod(500);

        // Initializing crawler controller
        CrawlerController crawlerController = new CrawlerController(configuration);
        // Adding crawler seed
        crawlerController.addSeed(new URL("http://en.wikipedia.org/"));
View Full Code Here

TOP

Related Classes of com.googlecode.flaxcrawler.CrawlerConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.