/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.urlfilter.validator;
import org.apache.nutch.urlfilter.validator.UrlValidator;
import org.junit.Assert;
import org.junit.Test;
/**
* JUnit test case which tests
* 1. that valid urls are not filtered while invalid ones are filtered.
* 2. that Urls' scheme, authority, path and query are validated.
*
* @author tejasp
*
*/
public class TestUrlValidator {
/**
* Test method for {@link org.apache.nutch.urlfilter.validator.UrlValidator#filter(java.lang.String)}.
*/
@Test
public void testFilter() {
UrlValidator url_validator = new UrlValidator();
Assert.assertNotNull(url_validator);
Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
Assert.assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
}
}