Package org.commoncrawl.util.shared

Examples of org.commoncrawl.util.shared.GoogleURL$GoogleURLComponent


   */

  public static String canonicalizeURL(String incomingURL,
      boolean stripLeadingWWW) throws MalformedURLException {

    GoogleURL urlObject = new GoogleURL(incomingURL);

    if (!urlObject.isValid()) {
      throw new MalformedURLException("URL:" + incomingURL + " is invalid");
    }

    return canonicalizeURL(urlObject, stripLeadingWWW);
  }
View Full Code Here


        if (colonEnd != -1) {
          hostLength = colonEnd;
          host = urlString.substring(hostStart, hostStart + hostLength);
        }

        GoogleURL urlObject = new GoogleURL("http://" + host);

        if (urlObject.isValid()) {
          return urlObject.getHost();
        }
      }
    }
    return null;
  }
View Full Code Here

TOP

Related Classes of org.commoncrawl.util.shared.GoogleURL$GoogleURLComponent

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.