Package org.apache.nutch.urlfilter.validator

Examples of org.apache.nutch.urlfilter.validator.UrlValidator.filter()


  @Test
  public void testFilter() {
    UrlValidator url_validator = new UrlValidator();
    Assert.assertNotNull(url_validator);

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
View Full Code Here


  public void testFilter() {
    UrlValidator url_validator = new UrlValidator();
    Assert.assertNotNull(url_validator);

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
View Full Code Here

    UrlValidator url_validator = new UrlValidator();
    Assert.assertNotNull(url_validator);

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
View Full Code Here

    Assert.assertNotNull(url_validator);

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
View Full Code Here

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
View Full Code Here

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
View Full Code Here

    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
View Full Code Here

    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    Assert.assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
View Full Code Here

    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    Assert.assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
        url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
    Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
View Full Code Here

        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    Assert.assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
        url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
    Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));

  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.