Package org.vietspider.net.client

Source Code of org.vietspider.net.client.WebClient

/***************************************************************************
* Copyright 2001-2007 The VietSpider         All rights reserved.       *
**************************************************************************/
package org.vietspider.net.client;

import static org.vietspider.net.client.HttpClientFactory.ACCEPT_CHARSET_NAME;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_CHARSET_VALUE;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_ENCODING_NAME;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_ENCODING_VALUE;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_LANGUAGE_NAME;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_LANGUAGE_VALUE;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_NAME;
import static org.vietspider.net.client.HttpClientFactory.ACCEPT_VALUE;
import static org.vietspider.net.client.HttpClientFactory.CACHE_CONTROL_NAME;
import static org.vietspider.net.client.HttpClientFactory.CACHE_CONTROL_VALUE;
import static org.vietspider.net.client.HttpClientFactory.CONNECTION_NAME;
import static org.vietspider.net.client.HttpClientFactory.CONNECTION_VALUE;
import static org.vietspider.net.client.HttpClientFactory.CONTENT_TYPE_NAME;
import static org.vietspider.net.client.HttpClientFactory.CONTENT_TYPE_VALUE_FORM;
import static org.vietspider.net.client.HttpClientFactory.KEEP_ALIVE_NAME;
import static org.vietspider.net.client.HttpClientFactory.KEEP_ALIVE_VALUE;
import static org.vietspider.net.client.HttpClientFactory.REFERER_NAME;
import static org.vietspider.net.client.HttpClientFactory.USER_AGENT_NAME;
import static org.vietspider.net.client.HttpClientFactory.USER_AGENT_VALUE;

import java.net.URI;
import java.net.URL;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.client.RedirectHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.vietspider.common.io.LogService;
/**
* Author : Nhu Dinh Thuan
*          nhudinhthuan@yahoo.com
* Oct 23, 2007 
*/
public class WebClient {

  private final static short NORMAL = 1;
  private final static short SLOW = 0;
  private final static short VERY_SLOW = -1;
  private final static short INVISIBLE = -2;

  private volatile boolean log = false;
  private volatile short type_execute = NORMAL;

  protected volatile String host;
  private volatile String userAgent;

  protected volatile DefaultHttpClient httpClient;
  protected volatile RedirectHandler redirectHandler;

  protected volatile ConcurrentHashMap<String, char[]> cacheData;

  private volatile int badRequest = 0;
  //  private volatile Proxies httpProxies;

  public WebClient() {
    httpClient = HttpClientFactory.createDefaultHttpClient();
    cacheData = new ConcurrentHashMap<String, char[]>();
    /*if(Application.SERVER_PROPERTIES != null){
      setProxy(Application.SERVER_PROPERTIES);
    } else if(Application.CLIENT_PROPERTIES != null){
      setProxy(Application.CLIENT_PROPERTIES);
    }*/
  }

  public void registryProxy(String proxyHost) {
    registryProxy(ProxiesMonitor.createProxy(proxyHost));
  }

  public void registryProxy(String proxyHost, int proxyPort, String username, String password) {
    if(username != null) {
      registryISAProxy(proxyHost, proxyPort, username, password);
      return;
    }
    //    System.out.println("==> proxy "+host+ " : "+ port);
    registryProxy(new HttpHost(proxyHost, proxyPort, "http"));
  }

  public void registryISAProxy(String proxyHost, int proxyPort, String username, String password) {
//    System.out.println(" dang ky isa proxy "+ proxyHost + " : "+ proxyPort+ " / "+ username+ " : "+ password);
    httpClient.getCredentialsProvider().setCredentials(
        new AuthScope(proxyHost, proxyPort), new UsernamePasswordCredentials(username, password));

    registryProxy(new HttpHost(proxyHost, proxyPort, "http"));

    //    httpClient.getCredentialsProvider().setCredentials(
    //        new AuthScope(proxyHost, proxyPort),
    //        new NTCredentials(username, password, proxyHost, proxyHost));
  }

  public void registryProxy(HttpHost httpProxy) {
    if(httpProxy == null) {
      httpClient.getParams().removeParameter(ConnRoutePNames.DEFAULT_PROXY);
      return;
    }
    httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, httpProxy);
  }

  public void setURL(String referer, URL url, String blind) throws Exception {
    this.setURL(url);
    ProxiesMonitor.getInstance().put(host, blind);
    if(referer == null || referer.trim().isEmpty()) return;
    execute(createHttpHost(referer), createGetMethod(referer, ""));
  }

  public void setURL(String referer, URL url) throws Exception {
    this.setURL(url);
    if(referer == null || referer.trim().isEmpty()) return;
    execute(createHttpHost(referer), createGetMethod(referer, ""));
  }

  private final void setURL(URL url) throws Exception {
    httpClient = HttpClientFactory.createDefaultHttpClient();
    if(redirectHandler != null) {
      httpClient.setRedirectHandler(redirectHandler);
    }

//    httpClient.getParams().removeParameter(ConnRoutePNames.DEFAULT_PROXY);

    type_execute = NORMAL;
    this.host = url.getHost();
    this.cacheData.clear();
    this.badRequest = 0;
  }

  public void shutdown() {
    httpClient.clearRequestInterceptors();
    httpClient.clearResponseInterceptors();
    httpClient.getConnectionManager().shutdown();
  }

  public HttpClient getHttpClient() { return httpClient; }

  public void clearCookies() throws Exception {
    httpClient.getCookieStore().clear();
  }

  public HttpResponse execute(HttpHost httpHost, HttpRequest httpRequest) throws Exception {
    //    if(httpHost.getHostName().indexOf("hochiminh") > -1) {
    //      List<Cookie> cookies = httpClient.getCookieStore().getCookies();
    //      for(int i = 0; i < cookies.size(); i++) {
    //        System.out.println(cookies.get(i).getName() + " : "+ cookies.get(i).getValue());
    //      }

    //      org.apache.http.Header [] headers = httpRequest.getAllHeaders();
    //      StringBuilder builder = new StringBuilder("================Header Value=====================\n");
    //      for(org.apache.http.Header header : headers) {
    //        builder.append(header.getName()).append(':').append(header.getValue()).append('\n');
    //      }
    //      builder.append("==========================================").append('\n');
    //      LogService.getInstance().setMessage("WEB", null, builder.toString());
    //    }
   
//    HttpHost httpHost2 = (HttpHost)httpClient.getParams().getParameter(ConnRoutePNames.DEFAULT_PROXY);
//    System.out.println("proxy la  " + httpClient.getParams().getParameter(ConnRoutePNames.DEFAULT_PROXY));
//    System.out.println(httpClient.getCredentialsProvider().getCredentials(
//        new AuthScope(httpHost2.getHostName(), httpHost2.getPort())));

    //    System.out.println("thay "+ httpRequest.getRequestLine().getUri()+ "  : "+slow);
    if(type_execute == SLOW) {
      try {
        Thread.sleep(7*1000);
      } catch (Exception e) {
      }
    } else if(type_execute == SLOW) {
      try {
        Thread.sleep(15*1000);
      } catch (Exception e) {
      }
    } else if(type_execute == INVISIBLE) {
      clearCookies();
    }

    return httpClient.execute(httpHost, httpRequest);
    /*{
      HttpResponse httpResponse  = httpClient.execute(httpHost, httpRequest);
      org.apache.http.Header [] headers = httpResponse.getAllHeaders();
      StringBuilder builder = new StringBuilder("================Header Value=====================\n");
      for(org.apache.http.Header header : headers) {
        builder.append(header.getName()).append(':').append(header.getValue()).append('\n');
      }
      builder.append("==========================================").append('\n');
      LogService.getInstance().setMessage("WEB", null, builder.toString());

      return httpResponse;
    }*/
  }

  public void cacheResponse(String address, char[] value) {
    if(cacheData.size() > 10) cacheData.clear();
    cacheData.put(address, value);
  }

  public ConcurrentHashMap<String, char[]> getCacheData() { return cacheData; }

  public String getHost() { return host; }

  public void setRedirectHandler(RedirectHandler redirectHandler) {
    this.redirectHandler = redirectHandler;
  }

  public String getUserAgent() { return userAgent; }
  public void setUserAgent(String agent) {
    this.userAgent = null;
    //    System.out.println("  chuan bi put user agent "+ userAgent_+ " : " + "slow".equalsIgnoreCase(userAgent_));
    if("slow".equals(agent)) {
      type_execute = SLOW;
      return;
    }

    if("very slow".equals(agent)) {
      type_execute = VERY_SLOW;
      return;
    }

    if("invisible".equals(agent)) {
      type_execute = INVISIBLE;
      return;
    }

    type_execute = NORMAL;
    this.userAgent = agent;
  }

  public HttpGet createGetMethod(String link, String referer) throws Exception {   
    HttpGet httpGet = new HttpGet(link);
    setHeaderValue(httpGet, referer);
    return httpGet;
  }

  public HttpHost createHttpHost(String address) {
    URL url  = null;
    boolean isSecure = address.toLowerCase().startsWith("https://");
    try {
      url  = new URL(address);
    } catch (Exception e) {
      return isSecure ? new HttpHost(host, 80, "https") : new HttpHost(host, 80);
    }
    int port = url.getPort();
    int dPort = url.getDefaultPort();
    if(port == -1 || port == 80) {
      return isSecure ? new HttpHost(url.getHost(), dPort, "https") : new HttpHost(url.getHost(), dPort);
    }
    return isSecure ? new HttpHost(url.getHost(), port, "https") new HttpHost(url.getHost(), port);
  }

  public HttpPost createFormPostMethod(String link,
      String referer, List<NameValuePair> nvpList) throws Exception {  
    HttpPost httpPost = new HttpPost(link);
    setHeaderValue(httpPost, referer);
    httpPost.addHeader(CONTENT_TYPE_NAME, CONTENT_TYPE_VALUE_FORM);

    if(log) {
      //for test
      UrlEncodedFormEntity entity = new UrlEncodedFormEntity(nvpList, HTTP.UTF_8);
      httpPost.setEntity(entity);
      org.vietspider.common.io.DataReader reader = new org.vietspider.common.io.DataReader();
      byte [] bytes = reader.loadInputStream(entity.getContent()).toByteArray();
      StringBuilder builder = new StringBuilder("=====================Post Value=====================\n");
      builder.append(new String(bytes, "utf-8")).append('\n');
      builder.append("==========================================").append('\n');
      LogService.getInstance().setMessage("WEB", null, builder.toString());
      //end test
    } else {
      httpPost.setEntity(new UrlEncodedFormEntity(nvpList, HTTP.UTF_8));
    }

    return httpPost;
  }

  public HttpPost createPostMethod(String link,
      String referer, List<NameValuePair> nvpList) throws Exception {   
    HttpPost httpPost = new HttpPost(link);
    setHeaderValue(httpPost, referer);

    if(nvpList == null) return httpPost;

    if(log) {
      //for test
      UrlEncodedFormEntity entity = new UrlEncodedFormEntity(nvpList, HTTP.UTF_8);
      httpPost.setEntity(entity);
      org.vietspider.common.io.DataReader reader = new org.vietspider.common.io.DataReader();
      byte [] bytes = reader.loadInputStream(entity.getContent()).toByteArray();
      StringBuilder builder = new StringBuilder("=====================Post Value=====================\n");
      builder.append(new String(bytes, "utf-8")).append('\n');
      builder.append("==========================================").append('\n');
      LogService.getInstance().setMessage("WEB", null, builder.toString());
      //end test
    } else {
      httpPost.setEntity(new UrlEncodedFormEntity(nvpList, HTTP.UTF_8));
    }

    return httpPost;
  }

  public void setHeaderValue(HttpUriRequest httpRequest, String referer) {
    String requestHost  = httpRequest.getURI().getHost();
    if(requestHost == null) {
      try {
        requestHost  = new URI(httpRequest.getRequestLine().getUri()).getHost();
      } catch (Exception e) {
      }
    }
    if(requestHost == null) requestHost = host;
    httpRequest.addHeader(HttpClientFactory.HOST_NAME, requestHost);
    if(userAgent != null) {
      httpRequest.addHeader(USER_AGENT_NAME, userAgent);
    } else {
      httpRequest.addHeader(USER_AGENT_NAME, USER_AGENT_VALUE);
    }
    httpRequest.addHeader(CONNECTION_NAME, CONNECTION_VALUE);
    httpRequest.addHeader(ACCEPT_ENCODING_NAME, ACCEPT_ENCODING_VALUE);
    httpRequest.addHeader(ACCEPT_CHARSET_NAME, ACCEPT_CHARSET_VALUE);
    httpRequest.addHeader(KEEP_ALIVE_NAME, KEEP_ALIVE_VALUE);
    httpRequest.addHeader(ACCEPT_NAME, ACCEPT_VALUE);
    httpRequest.addHeader(ACCEPT_LANGUAGE_NAME, ACCEPT_LANGUAGE_VALUE);
    httpRequest.addHeader(CACHE_CONTROL_NAME, CACHE_CONTROL_VALUE);
    if(referer != null) httpRequest.addHeader(REFERER_NAME, referer);
  }

  public boolean isLog() { return log; }

  public void setLog(boolean log) { this.log = log; }

  public RedirectHandler getRedirectHandler() { return redirectHandler; }

  public synchronized void increaseBadRequest() { badRequest++; }
  public boolean isBadClient() {
    //    System.out.println(" bad proxy "+ hashCode()+ " / "+ badRequest + "/5");
    if(badRequest < 5) return false;
    addBadProxy();
    return true;
  }

  public void resetBadRequestCounter() {  badRequest = 0; }
 
  public void addBadProxy() {
    Proxies proxies = ProxiesMonitor.getInstance().getProxies(host);
    if(proxies == null) return;
    HttpHost httpProxy = (HttpHost)httpClient.getParams().getParameter(ConnRoutePNames.DEFAULT_PROXY);
    if(httpProxy == null) return;
    proxies.addBadProxy(httpProxy.getHostName() + ":" +String.valueOf(httpProxy.getPort()));
  }

  public Proxies getProxies() {
    return ProxiesMonitor.getInstance().getProxies(host);
  }

}
TOP

Related Classes of org.vietspider.net.client.WebClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.