Package com.jeecms.cms.service

Source Code of com.jeecms.cms.service.AcquisitionSvcImpl$CharsetHandler

package com.jeecms.cms.service;

import java.io.IOException;
import java.net.URI;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.jeecms.cms.entity.assist.CmsAcquisition;
import com.jeecms.cms.entity.assist.CmsAcquisitionHistory;
import com.jeecms.cms.entity.assist.CmsAcquisitionTemp;
import com.jeecms.cms.entity.assist.CmsAcquisition.AcquisitionResultType;
import com.jeecms.cms.entity.main.CmsSite;
import com.jeecms.cms.entity.main.Content;
import com.jeecms.cms.manager.assist.CmsAcquisitionHistoryMng;
import com.jeecms.cms.manager.assist.CmsAcquisitionMng;
import com.jeecms.cms.manager.assist.CmsAcquisitionTempMng;

@Service
public class AcquisitionSvcImpl implements AcquisitionSvc {
  private Logger log = LoggerFactory.getLogger(AcquisitionSvcImpl.class);

  public boolean start(Integer id) {
    CmsAcquisition acqu = cmsAcquisitionMng.findById(id);
    if (acqu == null || acqu.getStatus() == CmsAcquisition.START) {
      return false;
    }
    Thread thread = new AcquisitionThread(acqu);
    thread.start();
    return true;
  }
 
  private void end(CmsAcquisition acqu){
    Integer siteId = acqu.getSite().getId();
    cmsAcquisitionMng.end(acqu.getId());
    CmsAcquisition acquisition = cmsAcquisitionMng.popAcquFromQueue(siteId);
    if (acquisition != null) {
      Integer id = acquisition.getId();
      start(id);
    }
  }

  private CmsAcquisitionMng cmsAcquisitionMng;
  private CmsAcquisitionHistoryMng cmsAcquisitionHistoryMng;
  private CmsAcquisitionTempMng cmsAcquisitionTempMng;

  @Autowired
  public void setCmsAcquisitionMng(CmsAcquisitionMng cmsAcquisitionMng) {
    this.cmsAcquisitionMng = cmsAcquisitionMng;
  }

  @Autowired
  public void setCmsAcquisitionHistoryMng(
      CmsAcquisitionHistoryMng cmsAcquisitionHistoryMng) {
    this.cmsAcquisitionHistoryMng = cmsAcquisitionHistoryMng;
  }

  @Autowired
  public void setCmsAcquisitionTempMng(
      CmsAcquisitionTempMng cmsAcquisitionTempMng) {
    this.cmsAcquisitionTempMng = cmsAcquisitionTempMng;
  }

  private class AcquisitionThread extends Thread {
    private CmsAcquisition acqu;

    public AcquisitionThread(CmsAcquisition acqu) {
      super(acqu.getClass().getName() + "#" + acqu.getId());
      this.acqu = acqu;
    }

    @Override
    public void run() {
      if (acqu == null) {
        return;
      }
      acqu = cmsAcquisitionMng.start(acqu.getId());
      String[] plans = acqu.getAllPlans();
      HttpClient client = new DefaultHttpClient();
      CharsetHandler handler = new CharsetHandler(acqu.getPageEncoding());
      List<String> contentList;
      String url;
      int currNum = acqu.getCurrNum();
      int currItem = acqu.getCurrItem();
      Integer acquId = acqu.getId();
      for (int i = plans.length - currNum; i >= 0; i--) {
        url = plans[i];
        contentList = getContentList(client, handler, url, acqu
            .getLinksetStart(), acqu.getLinksetEnd(), acqu
            .getLinkStart(), acqu.getLinkEnd());
        String link;
        for (int j = contentList.size() - currItem; j >= 0; j--) {
          if (cmsAcquisitionMng.isNeedBreak(acqu.getId(),
              plans.length - i, contentList.size() - j,
              contentList.size())) {
            client.getConnectionManager().shutdown();
            log.info("Acquisition#{} breaked", acqu.getId());
            return;
          }
          if (acqu.getPauseTime() > 0) {
            try {
              Thread.sleep(acqu.getPauseTime());
            } catch (InterruptedException e) {
              log.warn(null, e);
            }
          }
          link = contentList.get(j);
          float curr = contentList.size() - j;
          float total = contentList.size();
          CmsAcquisitionTemp temp = AcquisitionSvcImpl.this.newTemp(
              url, link, contentList.size() - j, curr, total,
              acqu.getSite());
          CmsAcquisitionHistory history = AcquisitionSvcImpl.this
              .newHistory(url, link, acqu);
          saveContent(client, handler, acquId, link, acqu
              .getTitleStart(), acqu.getTitleEnd(), acqu
              .getContentStart(), acqu.getContentEnd(), temp,
              history);
        }
        currItem = 1;
      }
      client.getConnectionManager().shutdown();
      AcquisitionSvcImpl.this.end(acqu);
      log.info("Acquisition#{} complete", acqu.getId());
    }

    private List<String> getContentList(HttpClient client,
        CharsetHandler handler, String url, String linksetStart,
        String linksetEnd, String linkStart, String linkEnd) {
      List<String> list = new ArrayList<String>();
      try {
        HttpGet httpget = new HttpGet(new URI(url));
        String base = url.substring(0, url.indexOf("/", url
            .indexOf("//") + 2));
        String html = client.execute(httpget, handler);
        int start = html.indexOf(linksetStart);
        if (start == -1) {
          return list;
        }
        start += linksetStart.length();
        int end = html.indexOf(linksetEnd, start);
        if (end == -1) {
          return list;
        }
        String s = html.substring(start, end);
        start = 0;
        String link;
        log.info(s);
        while ((start = s.indexOf(linkStart, start)) != -1) {
          start += linkStart.length();
          end = s.indexOf(linkEnd, start);
          if (end == -1) {
            return list;
          } else {
            link = s.substring(start, end);
            if (!link.startsWith("http")) {
              link = base + link;
            }
            log.debug("content link: {}", link);
            list.add(link);
            start = end + linkEnd.length();
          }
        }
      } catch (Exception e) {
        log.warn(null, e);
      }
      return list;
    }

    private Content saveContent(HttpClient client, CharsetHandler handler,
        Integer acquId, String url, String titleStart, String titleEnd,
        String contentStart, String contentEnd,
        CmsAcquisitionTemp temp, CmsAcquisitionHistory history) {
      CmsAcquisition acqu = cmsAcquisitionMng.findById(acquId);
      history.setAcquisition(acqu);
      try {
        int start, end;
        HttpGet httpget = new HttpGet(new URI(url));
        String html = client.execute(httpget, handler);
        start = html.indexOf(titleStart);
        if (start == -1) {
          return handerResult(temp, history, null,
              AcquisitionResultType.TITLESTARTNOTFOUND);
        }
        start += titleStart.length();
        end = html.indexOf(titleEnd, start);
        if (end == -1) {
          return handerResult(temp, history, null,
              AcquisitionResultType.TITLEENDNOTFOUND);
        }
        String title = html.substring(start, end);
        if (cmsAcquisitionHistoryMng
            .checkExistByProperties(true, title)) {
          return handerResult(temp, history, title,
              AcquisitionResultType.TITLEEXIST, true);
        }
        start = html.indexOf(contentStart);
        if (start == -1) {
          return handerResult(temp, history, title,
              AcquisitionResultType.CONTENTSTARTNOTFOUND);
        }
        start += contentStart.length();
        end = html.indexOf(contentEnd, start);
        if (end == -1) {
          return handerResult(temp, history, title,
              AcquisitionResultType.CONTENTENDNOTFOUND);
        }
        String txt = html.substring(start, end);
        Content content = cmsAcquisitionMng.saveContent(title, txt,
            acquId, AcquisitionResultType.SUCCESS, temp, history);
        cmsAcquisitionTempMng.save(temp);
        cmsAcquisitionHistoryMng.save(history);
        return content;
      } catch (Exception e) {
        log.warn(null, e);
        return handerResult(temp, history, null,
            AcquisitionResultType.UNKNOW);
      }
    }

    private Content handerResult(CmsAcquisitionTemp temp,
        CmsAcquisitionHistory history, String title,
        AcquisitionResultType errorType) {
      return handerResult(temp, history, title, errorType, false);
    }

    private Content handerResult(CmsAcquisitionTemp temp,
        CmsAcquisitionHistory history, String title,
        AcquisitionResultType errorType, Boolean repeat) {
      temp.setDescription(errorType.name());
      temp.setTitle(title);
      cmsAcquisitionTempMng.save(temp);
      if (!repeat) {
        history.setTitle(title);
        history.setDescription(errorType.name());
        cmsAcquisitionHistoryMng.save(history);
      }
      return null;
    }
  }

  private CmsAcquisitionTemp newTemp(String channelUrl, String contentUrl,
      Integer id, Float curr, Float total, CmsSite site) {
    CmsAcquisitionTemp temp = new CmsAcquisitionTemp();
    temp.setChannelUrl(channelUrl);
    temp.setContentUrl(contentUrl);
    temp.setSeq(id);
    NumberFormat nf = NumberFormat.getPercentInstance();
    String percent = nf.format(curr / total);
    temp.setPercent(Integer.parseInt(percent.substring(0,
        percent.length() - 1)));
    temp.setSite(site);
    return temp;
  }

  private CmsAcquisitionHistory newHistory(String channelUrl,
      String contentUrl, CmsAcquisition acqu) {
    CmsAcquisitionHistory history = new CmsAcquisitionHistory();
    history.setChannelUrl(channelUrl);
    history.setContentUrl(contentUrl);
    history.setAcquisition(acqu);
    return history;
  }

  private class CharsetHandler implements ResponseHandler<String> {
    private String charset;

    public CharsetHandler(String charset) {
      this.charset = charset;
    }

    public String handleResponse(HttpResponse response)
        throws ClientProtocolException, IOException {
      StatusLine statusLine = response.getStatusLine();
      if (statusLine.getStatusCode() >= 300) {
        throw new HttpResponseException(statusLine.getStatusCode(),
            statusLine.getReasonPhrase());
      }
      HttpEntity entity = response.getEntity();
      if (entity != null) {
        if (!StringUtils.isBlank(charset)) {
          return EntityUtils.toString(entity, charset);
        } else {
          return EntityUtils.toString(entity);
        }
      } else {
        return null;
      }
    }
  }
}
TOP

Related Classes of com.jeecms.cms.service.AcquisitionSvcImpl$CharsetHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.