Package bixo.fetcher.simulation

Examples of bixo.fetcher.simulation.FakeHttpFetcher


    protected void testHeadersInStatus(BasePlatform platform) throws Exception {
        Tap in = makeInputData(platform, "testHeadersInStatus", 1, 1);

        Pipe pipe = new Pipe("urlSource");
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1);
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
       
View Full Code Here


        Payload payload = new Payload();
        payload.put("key", "value");
        Tap in = makeInputData(platform, "testPayloads", 1, 1, payload);

        Pipe pipe = new Pipe("urlSource");
        BaseFetcher fetcher = new FakeHttpFetcher(false, 10);
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
       
View Full Code Here

    protected void testSkippingURLsByScore(BixoPlatform platform) throws Exception {
        // Create four pages, for domain0/page0, domain0/page1, domain1/page0, domain1/page1
        Tap in = makeInputData(platform, "testSkippingURLsByScore", 2, 2);

        Pipe pipe = new Pipe("urlSource");
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1);
        BaseScoreGenerator scorer = new SkippedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
       
View Full Code Here

        Pipe pipe = new Pipe("urlSource");
       
        // This will force all URLs to get skipped because of the crawl end time limit.
        FetcherPolicy defaultPolicy = new FetcherPolicy();
        defaultPolicy.setCrawlEndTime(0);
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1, defaultPolicy);
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy(defaultPolicy);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
View Full Code Here

        Pipe pipe = new Pipe("urlSource");
       
        // This will limit us to one URL.
        final int maxUrls = 1;
        FetcherPolicy defaultPolicy = new FetcherPolicy();
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1, defaultPolicy);
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy(defaultPolicy.getMaxRequestsPerConnection(), maxUrls, BaseFetchJobPolicy.DEFAULT_CRAWL_DELAY);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
View Full Code Here

            Tap in = platform.makeTap(platform.makeTextScheme(), inputPath);

            Pipe importPipe = new Each("url importer", new Fields("line"), new CreateUrlFunction());

            BaseScoreGenerator scorer = new FixedScoreGenerator();
            BaseFetcher fetcher = new FakeHttpFetcher(true, 10);
            FetchPipe fetchPipe = new FetchPipe(importPipe, scorer, fetcher, 1);

            // Create the output, which is a dual file sink tap.
            String output = "build/test/RunFakeFetchPipe/dual";
            BasePath outputPath = platform.makePath(output);
View Full Code Here

TOP

Related Classes of bixo.fetcher.simulation.FakeHttpFetcher

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.