Package org.apache.nutch.urlfilter.validator

Examples of org.apache.nutch.urlfilter.validator.UrlValidator.filter()


    assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
View Full Code Here


    assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
View Full Code Here

    assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
        url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
    assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
View Full Code Here

        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
        url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
    assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));

  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.