Package com.crawljax.core

Source Code of com.crawljax.core.InitialCrawler

package com.crawljax.core;

import java.util.ArrayList;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.crawljax.browser.EmbeddedBrowser;
import com.crawljax.core.plugin.CrawljaxPluginsUtil;
import com.crawljax.core.state.Eventable;
import com.crawljax.core.state.StateFlowGraph;
import com.crawljax.core.state.StateMachine;
import com.crawljax.core.state.StateVertex;

/**
* This is the initial Crawler. An initial crawler crawls only the index page, creates the index
* state and builds a session object and resumes the normal operations.
*
* @author Stefan Lenselink <S.R.Lenselink@student.tudelft.nl>
* @version $Id$
*/

public class InitialCrawler extends Crawler {

  private static final Logger LOGGER = LoggerFactory.getLogger(InitialCrawler.class);

  private final CrawljaxController controller;

  private EmbeddedBrowser browser; // should be final but try-catch prevents...

  private StateMachine stateMachine;

  /**
   * The default constructor.
   *
   * @param mother
   *            the controller to use.
   */
  public InitialCrawler(CrawljaxController mother) {
    super(mother, new ArrayList<Eventable>(), "initial");
    controller = mother;
  }

  @Override
  public EmbeddedBrowser getBrowser() {
    return browser;
  }

  @Override
  public StateMachine getStateMachine() {
    return stateMachine;
  }

  @Override
  public void run() {

    try {
      browser = controller.getBrowserPool().requestBrowser();
    } catch (InterruptedException e) {
      LOGGER.error("The request for a browser was interuped.");
    }

    goToInitialURL();

    /**
     * Build the index state
     */
    StateVertex indexState =
            new StateVertex(this.getBrowser().getCurrentUrl(), "index", this.getBrowser()
                    .getDom(), controller.getStrippedDom(this.getBrowser()));

    /**
     * Build the StateFlowGraph
     */
    StateFlowGraph stateFlowGraph = new StateFlowGraph(indexState);

    /**
     * Build the StateMachine
     */
    stateMachine =
            new StateMachine(stateFlowGraph, indexState, controller.getInvariantList());

    /**
     * Build the CrawlSession
     */
    CrawlSession session =
            new CrawlSession(controller.getBrowserPool(), stateFlowGraph, indexState,
                    controller.getStartCrawl(), controller.getConfigurationReader());
    controller.setSession(session);

    /**
     * Run OnNewState Plugins for the index state.
     */
    CrawljaxPluginsUtil.runOnNewStatePlugins(session);

    /**
     * The initial work is done, continue with the normal procedure!
     */
    super.run();

  }
TOP

Related Classes of com.crawljax.core.InitialCrawler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.