Package com.crawljax.core.state

Examples of com.crawljax.core.state.StateVertex


      context.getSession().addCrawlPath(crawlpath.immutableCopy());
    }
  }

  private void parseCurrentPageForCandidateElements() {
    StateVertex currentState = stateMachine.getCurrentState();
    LOG.debug("Parsing DOM of state {} for candidate elements", currentState.getName());
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(currentState);
   
    plugins.runPreStateCrawlingPlugins(context, extract, currentState);
    candidateActionCache.addActions(extract, currentState);
  }
View Full Code Here


  private void goBackOneState() {
    LOG.debug("Going back one state");
    CrawlPath currentPath = crawlpath.immutableCopy();
    crawlpath = null;
    StateVertex current = stateMachine.getCurrentState();
    reset();
    follow(currentPath, current);
  }
View Full Code Here

   */
  public StateVertex crawlIndex() {
    LOG.debug("Setting up vertex of the index page");
    browser.goToUrl(url);
    plugins.runOnUrlLoadPlugins(context);
    StateVertex index =
            StateMachine.createIndex(url.toExternalForm(), browser.getStrippedDom(),
                    stateComparator.getStrippedDom(browser));
    Preconditions.checkArgument(index.getId() == StateVertex.INDEX_ID,
            "It seems some the index state is crawled more than once.");

    LOG.debug("Parsing the index for candidate elements");
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(index);

View Full Code Here

  }

  private void follow(CrawlPath path, StateVertex targetState)
          throws StateUnreachableException,
          CrawljaxException {
    StateVertex curState = context.getSession().getInitialState();

    for (Eventable clickable : path) {

      checkCrawlConditions(targetState);

      LOG.debug("Backtracking by executing {} on element: {}", clickable.getEventType(),
              clickable);

      boolean switched = stateMachine.changeState(clickable.getTargetStateVertex());
      if (!switched) {
        throw new StateUnreachableException(targetState, "Could not switch states");
      }
      curState = clickable.getTargetStateVertex();
      crawlpath.add(clickable);
      handleInputElements(clickable);
      if (fireEvent(clickable)) {
        if (crawlerLeftDomain()) {
          throw new StateUnreachableException(targetState,
                  "Domain left while following path");
        }
        int depth = crawlDepth.incrementAndGet();
        LOG.info("Crawl depth is now {}", depth);
        plugins.runOnRevisitStatePlugins(context, curState);

      } else {
        throw new StateUnreachableException(targetState, "couldn't fire eventable "
                + clickable);
      }

      checkCrawlConditions(targetState);
    }

    if (!curState.equals(targetState)) {
      throw new StateUnreachableException(targetState,
              "The path didn't result in the desired state but in state "
                      + curState.getName());
    }
  }
View Full Code Here

  private void inspectNewState(Eventable event) {
    if (crawlerLeftDomain()) {
      LOG.debug("The browser left the domain. Going back one state...");
      goBackOneState();
    } else {
      StateVertex newState = stateMachine.newStateFor(browser);
      if (domChanged(event, newState)) {
        inspectNewDom(event, newState);
      } else {
        LOG.debug("Dom unchanged");
      }
View Full Code Here

      context.getSession().addCrawlPath(crawlpath.immutableCopy());
    }
  }

  private void parseCurrentPageForCandidateElements() {
    StateVertex currentState = stateMachine.getCurrentState();
    LOG.debug("Parsing DOM of state {} for candidate elements", currentState.getName());
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(currentState);

    plugins.runPreStateCrawlingPlugins(context, extract, currentState);

    candidateActionCache.addActions(extract, currentState);
View Full Code Here

  private void goBackOneState() {
    LOG.debug("Going back one state");
    CrawlPath currentPath = crawlpath.immutableCopy();
    crawlpath = null;
    StateVertex current = stateMachine.getCurrentState();
    reset();
    follow(currentPath, current);
  }
View Full Code Here

   */
  public StateVertex crawlIndex() {
    LOG.debug("Setting up vertex of the index page");
    browser.goToUrl(url);
    plugins.runOnUrlLoadPlugins(context);
    StateVertex index = StateMachine.createIndex(url.toExternalForm(), browser.getDom(),
            stateComparator.getStrippedDom(browser));
    Preconditions.checkArgument(index.getId() == StateVertex.INDEX_ID,
            "It seems some the index state is crawled more than once.");

    LOG.debug("Parsing the index for candidate elements");
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(index);

View Full Code Here

  @Override
  public CrawlSession call() {
    setMaximumCrawlTimeIfNeeded();
    plugins.runPreCrawlingPlugins(config);
    CrawlTaskConsumer firstConsumer = consumerFactory.get();
    StateVertex firstState = firstConsumer.crawlIndex();
    crawlSessionProvider.setup(firstState);
    plugins.runOnNewStatePlugins(firstConsumer.getContext(), firstState);
    executeConsumers(firstConsumer);
    return crawlSessionProvider.get();
  }
View Full Code Here

    assertThat(config.getCrawlRules().followExternalLinks(), is(true));
    assertThat(extract, hasSize(3));
  }

  private List<CandidateElement> extractFromTestFile(CandidateElementExtractor extractor) {
    StateVertex currentState = Mockito.mock(StateVertex.class);
    String file = "/candidateElementExtractorTest/domWithOneExternalAndTwoInternal.html";
    URL dom = Resources.getResource(getClass(), file);
    browser.goToUrl(dom);
    List<CandidateElement> extract = extractor.extract(currentState);
    return extract;
View Full Code Here

TOP

Related Classes of com.crawljax.core.state.StateVertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.