Package org.apache.nutch.protocol.httpclient

Source Code of org.apache.nutch.protocol.httpclient.HttpResponse

/* Copyright (c) 2004 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */

package org.apache.nutch.protocol.httpclient;

import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.NutchConf;
import org.apache.nutch.util.mime.MimeType;
import org.apache.nutch.util.mime.MimeTypes;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpVersion;

import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
* An HTTP response.
*/
public class HttpResponse {
  /** A flag that tells if magic resolution must be performed */
  private final static boolean MAGIC =
        NutchConf.get().getBoolean("mime.type.magic", true);

  /** Get the MimeTypes resolver instance. */
  private final static MimeTypes MIME =
        MimeTypes.get(NutchConf.get().get("mime.types.file"));

  private String orig;

  private String base;

  private byte[] content;

  private static final byte[] EMPTY_CONTENT = new byte[0];

  private int code;

  private MultiProperties headers = new MultiProperties();

  /**
   * Returns the response code.
   */
  public int getCode() {
    return code;
  }

  /**
   * Returns the value of a named header.
   */
  public String getHeader(String name) {
    return (String) headers.get(name);
  }

  public byte[] getContent() {
    return content;
  }

  public Content toContent() {
    String contentType = getHeader("Content-Type");
    if (contentType == null) {
      MimeType type = null;
      if (MAGIC) {
        type = MIME.getMimeType(orig, content);
      } else {
        type = MIME.getMimeType(orig);
      }
      if (type != null) {
          contentType = type.getName();
      } else {
          contentType = "";
      }
    }
    if (content == null) content = EMPTY_CONTENT;
    return new Content(orig, base, content, contentType, headers);
  }

  public HttpResponse(URL url) throws IOException {
    this.base = url.toString();
    this.orig = url.toString();
    GetMethod get = new GetMethod(this.orig);
    get.setFollowRedirects(false);
    get.setRequestHeader("User-Agent", Http.AGENT_STRING);
    HttpMethodParams params = get.getParams();
    // some servers cannot digest the new protocol
    params.setVersion(HttpVersion.HTTP_1_0);
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
      code = Http.getClient().executeMethod(get);

      Header[] heads = get.getResponseHeaders();

      for (int i = 0; i < heads.length; i++) {
        headers.put(heads[i].getName(), heads[i].getValue());
      }
      // always read content. Sometimes content is useful to find a cause
      // for error.
      try {
        InputStream in = get.getResponseBodyAsStream();
        byte[] buffer = new byte[Http.BUFFER_SIZE];
        int bufferFilled = 0;
        int totalRead = 0;
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        int tryAndRead = calculateTryToRead(totalRead);
        while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && tryAndRead > 0) {
          totalRead += bufferFilled;
          out.write(buffer, 0, bufferFilled);
          tryAndRead = calculateTryToRead(totalRead);
        }

        content = out.toByteArray();
        in.close();
      } catch (Exception e) {
        if (code == 200) throw new IOException(e.toString());
        // for codes other than 200 OK, we are fine with empty content
      }
    } catch (org.apache.commons.httpclient.ProtocolException pe) {
      pe.printStackTrace();
      get.releaseConnection();
      throw new IOException(pe.toString());
    } finally {
      get.releaseConnection();
    }
  }

  private int calculateTryToRead(int totalRead) {
    int tryToRead = Http.BUFFER_SIZE;
    if (Http.MAX_CONTENT <= 0) {
      return Http.BUFFER_SIZE;
    } else if (Http.MAX_CONTENT - totalRead < Http.BUFFER_SIZE) {
      tryToRead = Http.MAX_CONTENT - totalRead;
    }
    return tryToRead;
  }

}
TOP

Related Classes of org.apache.nutch.protocol.httpclient.HttpResponse

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.