Package com.crawljax.core.configuration.CrawljaxConfiguration

Examples of com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder


    }

  }

  private CrawljaxConfiguration readConfig(String urlValue, String outputDir) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(urlValue);

    builder.setOutputDirectory(new File(outputDir));

    BrowserType browser = BrowserType.FIREFOX;
    if (options.specifiesBrowser()) {
      browser = options.getSpecifiedBrowser();
    }

    int browsers = 1;
    if (options.specifiesParallelBrowsers()) {
      browsers = options.getSpecifiedNumberOfBrowsers();
    }
    if (browser == BrowserType.REMOTE) {
      String remoteUrl = options.getSpecifiedRemoteBrowser();
      builder.setBrowserConfig(BrowserConfiguration.remoteConfig(browsers, remoteUrl));
    } else {
      builder.setBrowserConfig(new BrowserConfiguration(browser, browsers));
    }

    if (options.specifiesDepth()) {
      builder.setMaximumDepth(options.getSpecifiedDepth());
    }

    if (options.specifiesMaxStates()) {
      builder.setMaximumStates(options.getMaxStates());
    }

    if (options.requestsCrawlHiddenAnchors()) {
      builder.crawlRules().crawlHiddenAnchors(true);
    }

    configureTimers(builder);

    builder.addPlugin(new CrawlOverview());

    if (options.specifiesClickElements()) {
      builder.crawlRules().click(options.getSpecifiedClickElements());
    } else {
      builder.crawlRules().clickDefaultElements();
    }

    return builder.build();
  }
View Full Code Here


    }

  }

  private CrawljaxConfiguration readConfig(String urlValue, String outputDir) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(urlValue);

    BrowserType browser = BrowserType.firefox;
    if (commandLine.hasOption(BROWSER)) {
      String browserString = commandLine.getOptionValue(BROWSER);
      browser = getBrowserTypeFromStr(browserString);
    }

    int browsers = 1;
    if (commandLine.hasOption(PARALLEL)) {
      browsers = Integer.parseInt(commandLine.getOptionValue(PARALLEL));
    }
    builder.setBrowserConfig(new BrowserConfiguration(browser, browsers));

    if (commandLine.hasOption(DEPTH)) {
      String depth = commandLine.getOptionValue(DEPTH);
      builder.setMaximumDepth(Integer.parseInt(depth));
    }

    if (commandLine.hasOption(MAXSTATES)) {
      String maxstates = commandLine.getOptionValue(MAXSTATES);
      builder.setMaximumStates(Integer.parseInt(maxstates));
    }

    if (commandLine.hasOption(CRAWL_HIDDEN_ANCHORS)) {
      builder.crawlRules().crawlHiddenAnchors(true);
    }

    configureTimers(builder);

    builder.addPlugin(new CrawlOverview(new File(outputDir)));

    if (commandLine.hasOption(CLICK)) {
      builder.crawlRules().click(commandLine.getOptionValue(CLICK).split(","));
    } else {
      builder.crawlRules().clickDefaultElements();
    }

    return builder.build();
  }
View Full Code Here

   *
   * @throws IOException
   *             when the output folder cannot be created or emptied.
   */
  public static void main(String[] args) throws IOException {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
    builder.crawlRules().insertRandomDataInInputForms(false);

    // click these elements
    builder.crawlRules().clickDefaultElements();
    builder.crawlRules().click("div").withAttribute("class", "clickable");

    // but don't click these
    builder.crawlRules().dontClick("a").withAttribute("class", "ignore");
    builder.crawlRules().dontClick("a").underXPath("//DIV[@id='footer']");

    // Set timeouts
    builder.crawlRules().waitAfterReloadUrl(WAIT_TIME_AFTER_RELOAD, TimeUnit.MILLISECONDS);
    builder.crawlRules().waitAfterEvent(WAIT_TIME_AFTER_EVENT, TimeUnit.MILLISECONDS);

    // Add a condition that this XPath doesn't exits
    builder.crawlRules().addCrawlCondition("No spans with foo as class",
            new NotXPathCondition("//*[@class='foo']"));

    // Set some input for fields
    builder.crawlRules().setInputSpec(getInputSpecification());

    // This will generate a nice output in the output directory.
    File outFolder = new File("output");
    if (outFolder.exists()) {
      FileUtils.deleteDirectory(outFolder);
    }
    builder.setOutputDirectory(outFolder);
    builder.addPlugin(new CrawlOverview());

    // We want to use two browsers simultaneously.
    builder.setBrowserConfig(new BrowserConfiguration(BrowserType.FIREFOX, 2));

    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();

  }
View Full Code Here

  /**
   * Entry point
   */
  public static void main(String[] args) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
    builder.crawlRules().insertRandomDataInInputForms(false);

    builder.crawlRules().click("a");
    builder.crawlRules().click("button");

    // except these
    builder.crawlRules().dontClick("a").underXPath("//DIV[@id='guser']");
    builder.crawlRules().dontClick("a").withText("Language Tools");

    // limit the crawling scope
    builder.setMaximumStates(MAX_NUMBER_STATES);
    builder.setMaximumDepth(MAX_DEPTH);

    builder.crawlRules().setInputSpec(getInputSpecification());

    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();

  }
View Full Code Here

  /**
   * Entry point
   */
  public static void main(String[] args) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
    builder.crawlRules().clickDefaultElements();
    builder.crawlRules().dontClick(ALL_ANCHORS).underXPath(HEADER_XPATH);
    builder.crawlRules().dontClick(ALL_ANCHORS).withText(LANGUAGE_TOOLS);

    // limit the crawling scope
    builder.setMaximumStates(MAX_STATES);
    builder.setMaximumDepth(MAX_CRAWL_DEPTH);

    builder.crawlRules().setInputSpec(getInputSpecification());

    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();
  }
View Full Code Here

  /**
   * entry point
   */
  public static void main(String[] args) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
    builder.crawlRules().insertRandomDataInInputForms(false);

    builder.crawlRules().click("a");

    // click these elements
    builder.crawlRules().clickDefaultElements();
    builder.crawlRules().click("div").withAttribute("class", "clickable");

    // but don't click these
    builder.crawlRules().dontClick("a").withAttribute("class", "ignore");
    builder.crawlRules().dontClick("a").underXPath("//DIV[@id='footer']");

    // Set timeouts
    builder.crawlRules().waitAfterReloadUrl(WAIT_TIME_AFTER_RELOAD, TimeUnit.MILLISECONDS);
    builder.crawlRules().waitAfterEvent(WAIT_TIME_AFTER_EVENT, TimeUnit.MILLISECONDS);

    // Add a condition that this XPath doesn't exits
    builder.crawlRules().addCrawlCondition("No spans with foo as class",
            new NotXPathCondition(
                    "//*[@class='foo']"));

    // Set some input for fields
    builder.crawlRules().setInputSpec(getInputSpecification());

    // This will generate a nice output in the output directory.
    builder.addPlugin(new CrawlOverview(new File("output")));

    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();

  }
View Full Code Here

   *
   * @throws IOException
   *             when the output folder cannot be created or emptied.
   */
  public static void main(String[] args) throws IOException {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
    builder.crawlRules().insertRandomDataInInputForms(false);

    // click these elements
    builder.crawlRules().clickDefaultElements();
    builder.crawlRules().click("div").withAttribute("class", "clickable");

    // but don't click these
    builder.crawlRules().dontClick("a").withAttribute("class", "ignore");
    builder.crawlRules().dontClick("a").underXPath("//DIV[@id='footer']");

    // Set timeouts
    builder.crawlRules().waitAfterReloadUrl(WAIT_TIME_AFTER_RELOAD, TimeUnit.MILLISECONDS);
    builder.crawlRules().waitAfterEvent(WAIT_TIME_AFTER_EVENT, TimeUnit.MILLISECONDS);

    // Add a condition that this XPath doesn't exits
    builder.crawlRules().addCrawlCondition("No spans with foo as class",
            new NotXPathCondition(
                    "//*[@class='foo']"));

    // Set some input for fields
    builder.crawlRules().setInputSpec(getInputSpecification());

    // This will generate a nice output in the output directory.
    File outFolder = new File("output");
    if (outFolder.exists()) {
      FileUtils.deleteDirectory(outFolder);
    }
    builder.addPlugin(new CrawlOverview(outFolder));

    // We want to use two browsers simultaneously.
    builder.setBrowserConfig(new BrowserConfiguration(BrowserType.firefox, 2));

    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();

  }
View Full Code Here

  private static final Logger LOG = LoggerFactory.getLogger(PluginExample.class);

  public static void main(String[] args) {

    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
    builder.addPlugin(new OnNewStatePlugin() {

      @Override
      public void onNewState(CrawlerContext context, StateVertex newState) {
        // This will print the DOM when a new state is detected. You should see it in your
        // console.
        LOG.info("Sound a new dom! Here it is:\n{}", context.getBrowser().getDom());
      }

      @Override
      public String toString() {
        return "Our example plugin";
      }
    });
    CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
    crawljax.call();
  }
View Full Code Here

TOP

Related Classes of com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.