Package org.archive.modules

Examples of org.archive.modules.CrawlURI



    protected CrawlURI makeCrawlURI(String uri) throws URIException,
            IOException {
        UURI uuri = UURIFactory.getInstance(uri);
        CrawlURI curi = new CrawlURI(uuri);
        curi.setSeed(true);
        curi.setRecorder(getRecorder());
        return curi;
    }
View Full Code Here


     * @param policy
     * @param r
     * @throws IOException
     */
    public void evalQueryString(RobotsPolicy policy, Robotstxt rtxt) throws IOException {
        CrawlURI qs = new CrawlURI(UURIFactory.getInstance("http://example.com/ok?butno=something"));

        assertFalse("ignoring query-string", policy.allows("Mozilla allowbot2 99.9", qs, rtxt));
       
    }
View Full Code Here

    public void testNoVia() throws Exception {
        ViaSurtPrefixedDecideRule dr = new ViaSurtPrefixedDecideRule();
        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com");
       
        assertFalse(dr.evaluate(testUri));
    }
View Full Code Here

    }
    public void testNoSurts() throws Exception {
        ViaSurtPrefixedDecideRule dr = new ViaSurtPrefixedDecideRule();
        List<String> surtPrefixes = new ArrayList<String>();
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com");

        assertFalse(dr.evaluate(testUri));
    }
View Full Code Here

        assertFalse(dr.evaluate(testUri));
    }
    public void testNullSurts() throws Exception {
        ViaSurtPrefixedDecideRule dr = new ViaSurtPrefixedDecideRule();
        dr.setSurtPrefixes(null);
        CrawlURI testUri = createTestUri("http://example.com");

        assertFalse(dr.evaluate(testUri));
    }
View Full Code Here

    public void testPositiveSingleSurt() throws Exception {
        ViaSurtPrefixedDecideRule dr = new ViaSurtPrefixedDecideRule();
        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com","http://archive.org");

        assertTrue(dr.evaluate(testUri));
    }
View Full Code Here

    public void testNegativeSingleSurt() throws Exception {
        ViaSurtPrefixedDecideRule dr = new ViaSurtPrefixedDecideRule();
        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com","http://google.com");

        assertFalse(dr.evaluate(testUri));
    }
View Full Code Here

        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        surtPrefixes.add("http://(com,test,");
        surtPrefixes.add("http://(com,google,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com","http://google.com");

        assertTrue(dr.evaluate(testUri));
    }
View Full Code Here

        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        surtPrefixes.add("http://(com,google,");
        surtPrefixes.add("http://(com,test,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com","http://google.com");

        assertTrue(dr.evaluate(testUri));
    }
View Full Code Here

        List<String> surtPrefixes = new ArrayList<String>();
        surtPrefixes.add("http://(org,archive,");
        surtPrefixes.add("http://(com,test,");
        surtPrefixes.add("http://(com,google,");
        dr.setSurtPrefixes(surtPrefixes);
        CrawlURI testUri = createTestUri("http://example.com","http://negativeexample.com");

        assertFalse(dr.evaluate(testUri));
    }
View Full Code Here

TOP

Related Classes of org.archive.modules.CrawlURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.