Package org.archive.net

Examples of org.archive.net.LaxURI


     * @throws RobotsUnavailableException
     */
    public boolean isRobotPermitted(String url, String userAgent)
            throws IOException, RobotsUnavailableException {
        RobotRules rules = getRulesForUrl(url, userAgent);
        return !rules.blocksPathForUA(new LaxURI(url, false).getPath(),
                userAgent);
    }
View Full Code Here


     */
    public abstract RobotRules getRulesForUrl(String url, String userAgent)
            throws IOException, RobotsUnavailableException;

    public static String robotsUrlForUrl(String url) throws URIException {
        LaxURI uri = new LaxURI(url, false);
        uri.setPath("/robots.txt");
        uri.setQuery(null);
        uri.setFragment(null);
        return uri.toString();
    }
View Full Code Here

    throws LiveDocumentNotAvailableException, URIException, IOException {
   
    HttpClient http = getHttpClient();
    OutputStream os = new FileOutputStream(file);
    ExtendedGetMethod method = new ExtendedGetMethod(os);
    LaxURI lURI = new LaxURI(urlString,true);
    method.setURI(lURI);
    try {
      int code = http.executeMethod(method);
      os.close();
      // TODO: Constant 200
View Full Code Here

   * @return true if the url-timestamp should not be shown to end users
   */
  public boolean isExcluded(String urlString, String timestamp) {
    SearchResult sr = new SearchResult();

    LaxURI url = null;
    String host = null;
    try {
      url = new LaxURI(urlString,true);
      host = url.getHost();
    } catch (URIException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      return true;
    }
View Full Code Here

TOP

Related Classes of org.archive.net.LaxURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.