Package com.canoo.webtest.extension.spider

Source Code of com.canoo.webtest.extension.spider.Spider

/*
* Copyright (c) 2005 Canoo Engineering. All Rights Reserved.
*/
package com.canoo.webtest.extension.spider;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

import com.canoo.webtest.engine.Context;
import com.canoo.webtest.engine.StepFailedException;
import com.canoo.webtest.steps.StepUtil;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

/**
* @author Denis N. Antonioli
*/
public class Spider {
  private static final Logger LOG = Logger.getLogger(Spider.class);
  public static final IVisitorStrategy ALWAYS_ACCEPT_VISITOR_STRATEGY = new AlwaysAcceptVisitorStrategy();
  public static final IReporter NO_OP_REPORTER = new NoOpReporter();
  public static final IValidator NO_OP_VALIDATOR = new NoOpValidator();

  private final Map fVisitedLinks = new HashMap();

  private IReporter fReporter;
  private IVisitorStrategy fVisitorStrategy;
  private IValidator fValidator;

  private String fFileName;
  private int fDepth;
  private boolean fFailOnError;
  private Context fContext;

  public void setFailOnError(boolean failOnError) {
    fFailOnError = failOnError;
  }

  public void setFileName(final String filename) {
    fFileName = filename;
  }

  public String getFileName() {
    return fFileName;
  }

  public void setDepth(final int depth) {
    fDepth = depth;
  }

  public void setReporter(final IReporter reporter) {
    fReporter = reporter;
  }

  public IReporter getReporter() {
    return fReporter;
  }

  public void setVisitorStrategy(final IVisitorStrategy visitorStrategy) {
    fVisitorStrategy = visitorStrategy;
  }

  public IVisitorStrategy getVisitorStrategy() {
    return fVisitorStrategy;
  }

  public void setValidator(final IValidator validator) {
    fValidator = validator;
  }

  public IValidator getValidator() {
    return fValidator;
  }

  Writer getWriter() throws IOException {
    final Writer writer;
    if (fFileName != null) {
      final File file = new File(fContext.getConfig().getWebTestResultDir(), fFileName);
      LOG.info("Writing in " + file);
      writer = new FileWriter(file);
    } else {
      LOG.info("Writing in standard output");
      writer = new OutputStreamWriter(System.out);
    }
    return writer;
  }

    void setContext(Context context) {
        fContext = context;
    }

    public void execute(final Context context) {
    validate();
    fVisitedLinks.clear();
    setContext(context);
        doExecute();
    }

    boolean doExecute() {
        Writer writer = null;
        boolean success = false;
        try {
            writer = getWriter();
            fReporter.setWriter(writer);
            fReporter.writeHeader();
            visit((HtmlPage) fContext.getCurrentResponse(), fDepth);
            fReporter.writeFooter();
            success = true;
        } catch (final Throwable e) {
            LOG.error("Problems during write: " + e.getMessage(), e);
        } finally {
            // doing this to Stdout will cause interuption
            IOUtils.closeQuietly(writer);
        }
        return success;
    }

    void validate() {
        if (fDepth < 0) {
            throw new IllegalArgumentException("depth must be >= 0");
        }
        if (fFileName == null) {
            LOG.info("No file name defined, will output to console");
        }
        if (fReporter == null) {
            LOG.info("No reporter defined, using noop reporter");
            fReporter = NO_OP_REPORTER;
        }
        if (fValidator == null) {
            LOG.info("No validator defined, using noop validator");
            fValidator = NO_OP_VALIDATOR;
        }
        if (fVisitorStrategy == null) {
            LOG.info("No visitor strategy set, using noop strategy");
            fVisitorStrategy = ALWAYS_ACCEPT_VISITOR_STRATEGY;
        }
    }

  void visit(final HtmlPage currentResponse, final int depth) throws IOException {
    LOG.debug("report depth " + depth);
    for (final Iterator iter = currentResponse.getAnchors().iterator(); iter.hasNext();) {
      final HtmlAnchor link = (HtmlAnchor) iter.next();

      final Properties linkInfo = fValidator.validate(fDepth - depth, currentResponse, link);
      fReporter.write(linkInfo);
      if (depth > 0 && needsReport(link)) {
                processLink(link, depth);
            }
    }
  }

    void processLink(final HtmlAnchor link, final int depth) throws IOException {
        try {
            follow(link);
            final Page page = fContext.getCurrentResponse();
            if (page instanceof HtmlPage)
            {
              visit((HtmlPage) page, depth - 1);
            }
            else
            {
              final WebResponse response = page.getWebResponse();
              LOG.info("Don't going deeper in response for " + response.getRequestUrl()
                  + " as it isn't an html page (content type: "
                  + response.getContentType() + ", page" + page + ")");
            }
        }
        catch (final StepFailedException e) {
            LOG.error(e.getMessage(), e);
            if (fFailOnError) {
                throw e;
            }
        }
    }

    void follow(final HtmlAnchor link) {
        LOG.debug("Clicking on link with href: " + link.getHrefAttribute());
        try
        {
          link.click();
        }
    catch (final Exception ex) {
      StepUtil.handleException(ex);
    }
    }

  boolean needsReport(final HtmlAnchor link) {
    if (fVisitedLinks.containsKey(link.getHrefAttribute())) {
      LOG.info(link.getHrefAttribute() + " skipped: already visited");
      return false;
    }
    if (!fVisitorStrategy.accept(link)) {
      LOG.info(link.getHrefAttribute() + " skipped: rejected by visitor");
      return false;
    }
    fVisitedLinks.put(link.getHrefAttribute(), Boolean.TRUE);
    return true;
  }

  private static class AlwaysAcceptVisitorStrategy implements IVisitorStrategy {
    public boolean accept(HtmlAnchor link) {
      return true;
    }
  }

  private static class NoOpReporter implements IReporter {
    public void writeHeader() {
    }

    public void write(Properties linkInfo) {
    }

    public void setWriter(Writer writer) {
    }

    public void writeFooter() {
    }
  }

  private static class NoOpValidator implements IValidator {
    private static final Properties EMPTY_PROPERTIES = new Properties();
    public Properties validate(final int depth, final HtmlPage webResponse, final HtmlAnchor link) {
      return EMPTY_PROPERTIES;
    }
  }
}
TOP

Related Classes of com.canoo.webtest.extension.spider.Spider

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.