Package org.apache.nutch.urlfilter.validator

Source Code of org.apache.nutch.urlfilter.validator.TestUrlValidator

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.urlfilter.validator;

import org.apache.nutch.urlfilter.validator.UrlValidator;
import org.junit.Assert;
import org.junit.Test;

/**
* JUnit test case which tests
* 1. that valid urls are not filtered while invalid ones are filtered.
* 2. that Urls' scheme, authority, path and query are validated.
*
* @author tejasp
*
*/

public class TestUrlValidator {

  /**
   * Test method for {@link org.apache.nutch.urlfilter.validator.UrlValidator#filter(java.lang.String)}.
   */
  @Test
  public void testFilter() {
    UrlValidator url_validator = new UrlValidator();
    Assert.assertNotNull(url_validator);

    Assert.assertNull("Filtering on a null object should return null", url_validator.filter(null));
    Assert.assertNull("Invalid url: example.com/file[/].html", url_validator.filter("example.com/file[/].html"));
    Assert.assertNull("Invalid url: http://www.example.com/space here.html", url_validator.filter("http://www.example.com/space here.html"));
    Assert.assertNull("Invalid url: /main.html", url_validator.filter("/main.html"));
    Assert.assertNull("Invalid url: www.example.com/main.html", url_validator.filter("www.example.com/main.html"));
    Assert.assertNull("Invalid url: ftp:www.example.com/main.html", url_validator.filter("ftp:www.example.com/main.html"));
    Assert.assertNull("Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html", url_validator.filter(" http://www.example.com/ma|in\\toc.html"));

    Assert.assertNotNull("Valid url: https://issues.apache.org/jira/NUTCH-1127", url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
    Assert.assertNotNull("Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather",
        url_validator.filter("http://domain.tld/function.cgi?url=http://fonzi.com/&name=Fonzi&mood=happy&coat=leather"));
    Assert.assertNotNull("Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
        url_validator.filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
    Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf", url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));

  }
}
TOP

Related Classes of org.apache.nutch.urlfilter.validator.TestUrlValidator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.