Package org.archive.wayback.util.url

Source Code of org.archive.wayback.util.url.UrlOperations

package org.archive.wayback.util.url;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.URIException;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;

/**
* Class containing common static URL methods. Primarily resolveUrl() and
* the (currently) unused isAuthority().
*
* @author brad
* @version $Date$, $Revision$
*/
public class UrlOperations {
 
  private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" +
      "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" +
      "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" +
      "|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo" +
      "|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk" +
      "|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg" +
      "|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma" +
      "|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz" +
      "|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm" +
      "|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj" +
      "|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn" +
      "|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu" +
      "|wf|ws|ye|yt|yu|za|zm|zw";
 
  private static final String GEN_TLDS = "aero|biz|cat|com|coop|edu|gov" +
      "|info|int|jobs|mil|mobi|museum|name|net|org|pro|travel";
 
 
  private static final String ALL_TLD_PATTERN = CC_TLDS + "|" + GEN_TLDS;

  private static final String IP_PATTERN = "[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+";
 
    private static final Pattern AUTHORITY_REGEX =
        Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" +
            "(" + IP_PATTERN + ")");

    /**
   * @param urlPart
   * @return boolean indicating whether urlPart might be an Authority.
   */
  public static boolean isAuthority(String urlPart) {
    Matcher m = AUTHORITY_REGEX.matcher(urlPart);
   
    return (m != null) && m.matches();
  }
 
  /**
   * @param baseUrl
   * @param url
   * @return url resolved against baseUrl, unless it is absolute already
   */
  public static String resolveUrl(String baseUrl, String url) {
    // TODO: this only works for http://
    if(url.startsWith("http://")) {
      return url;
    }
    UURI absBaseURI;
    UURI resolvedURI = null;
    try {
      absBaseURI = UURIFactory.getInstance(baseUrl);
      resolvedURI = UURIFactory.getInstance(absBaseURI, url);
    } catch (URIException e) {
      e.printStackTrace();
      return url;
    }
    return resolvedURI.getEscapedURI();
  }
}
TOP

Related Classes of org.archive.wayback.util.url.UrlOperations

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.