Package com.crawljax.cli

Source Code of com.crawljax.cli.JarRunner

package com.crawljax.cli;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.concurrent.TimeUnit;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.validator.routines.UrlValidator;

import ch.qos.logback.classic.Level;

import com.crawljax.browser.EmbeddedBrowser.BrowserType;
import com.crawljax.core.CrawljaxRunner;
import com.crawljax.core.CrawljaxException;
import com.crawljax.core.configuration.BrowserConfiguration;
import com.crawljax.core.configuration.CrawlRules;
import com.crawljax.core.configuration.CrawljaxConfiguration;
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
import com.crawljax.plugins.crawloverview.CrawlOverview;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;
import com.google.common.io.Resources;

public class JarRunner {

  static final String MISSING_ARGUMENT_MESSAGE =
          "Missing required argument URL and/or output folder.";

  static final String HELP_MESSAGE =
          "java -jar crawljax-cli-version.jar theUrl theOutputDir";

  static final String VERSION = "version";
  static final String VERBOSE = "verbose";
  static final String HELP = "help";
  static final String MAXSTATES = "maxstates";
  static final String DEPTH = "depth";
  static final String BROWSER = "browser";
  static final String PARALLEL = "parallel";
  static final String OVERRIDE = "override";
  static final String CRAWL_HIDDEN_ANCHORS = "crawlHiddenAnchors";
  static final String TIME_OUT = "timeout";
  static final String WAIT_AFTER_RELOAD = "waitAfterReload";
  static final String WAIT_AFTER_EVENT = "waitAfterEvent";
  static final String LOG_FILE = "log";

  static final String CLICK = "click";

  private static final int SPACES_AFTER_OPTION = 3;
  private static final int SPACES_BEFORE_OPTION = 5;
  private static final int ROW_WIDTH = 80;

  private final CommandLine commandLine;

  private final Options options;

  private final CrawljaxConfiguration config;

  /**
   * Main executable method of Crawljax CLI.
   *
   * @param args
   *            the arguments.
   */
  public static void main(String[] args) {
    try {
      JarRunner runner = new JarRunner(args);
      runner.runIfConfigured();
    } catch (NumberFormatException e) {
      System.err.println("Could not parse number " + e.getMessage());
      System.exit(1);
    } catch (RuntimeException e) {
      System.err.println(e.getMessage());
      System.exit(1);
    }
  }

  @VisibleForTesting
  JarRunner(String args[]) {
    this.options = getOptions();
    try {
      commandLine = new GnuParser().parse(options, args);
    } catch (ParseException e) {
      throw new IllegalArgumentException(e.getMessage(), e);
    }
    if (commandLine.hasOption(VERSION)) {
      System.out.println(getCrawljaxVersion());
      this.config = null;
    } else if (commandLine.getArgs().length == 2) {
      String url = commandLine.getArgs()[0];
      String outputDir = commandLine.getArgs()[1];
      checkUrlValidity(url);
      checkOutDir(outputDir);
      configureLogging();
      this.config = readConfig(url, outputDir);
    } else {
      if (!commandLine.hasOption(HELP)) {
        System.out.println(MISSING_ARGUMENT_MESSAGE);
      }
      printHelp();
      this.config = null;
    }
  }

  /**
   * Create the CML Options.
   *
   * @return Options expected from command-line.
   */
  private Options getOptions() {
    Options options = new Options();
    options.addOption("h", HELP, false, "print this message");
    options.addOption(VERSION, false, "print the version information and exit");

    options.addOption("b", "browser", true,
            "browser type: " + availableBrowsers() + ". Default is Firefox");

    options.addOption("d", DEPTH, true, "crawl depth level. Default is 2");

    options.addOption("s", MAXSTATES, true,
            "max number of states to crawl. Default is 0 (unlimited)");

    options.addOption("p", PARALLEL, true,
            "Number of browsers to use for crawling. Default is 1");
    options.addOption("o", OVERRIDE, false, "Override the output directory if non-empty");

    options.addOption("a", CRAWL_HIDDEN_ANCHORS, false,
            "Crawl anchors even if they are not visible in the browser.");

    options.addOption("t", TIME_OUT, true,
            "Specify the maximum crawl time in minutes");

    options.addOption(CLICK, true,
            "a comma separated list of HTML tags that should be clicked. Default is A and BUTTON");

    options.addOption(WAIT_AFTER_EVENT, true,
            "the time to wait after an event has been fired in milliseconds. Default is "
                    + CrawlRules.DEFAULT_WAIT_AFTER_EVENT);

    options.addOption(WAIT_AFTER_RELOAD, true,
            "the time to wait after an URL has been loaded in milliseconds. Default is "
                    + CrawlRules.DEFAULT_WAIT_AFTER_RELOAD);

    options.addOption("v", VERBOSE, false, "Be extra verbose");
    options.addOption(LOG_FILE, true, "Log to this file instead of the console");

    return options;
  }

  private String availableBrowsers() {
    return Joiner.on(", ").join(BrowserType.values());
  }

  private String getCrawljaxVersion() {
    try {
      return Resources
              .toString(JarRunner.class.getResource("/project.version"), Charsets.UTF_8);
    } catch (IOException e) {
      throw new CrawljaxException(e.getMessage(), e);
    }
  }

  private void printHelp() {
    String cmlSyntax = HELP_MESSAGE;
    final PrintWriter writer = new PrintWriter(System.out);
    final HelpFormatter helpFormatter = new HelpFormatter();
    helpFormatter.printHelp(writer, ROW_WIDTH, cmlSyntax, "", options, SPACES_AFTER_OPTION,
            SPACES_BEFORE_OPTION, "");
    writer.flush();
  }

  private void checkUrlValidity(String urlValue) {
    String[] schemes = { "http", "https" };
    if (urlValue == null || !new UrlValidator(schemes).isValid(urlValue)) {
      throw new IllegalArgumentException("provide a valid URL like http://example.com");
    }
  }

  private void checkOutDir(String outputDir) {
    File out = new File(outputDir);
    if (out.exists() && out.list().length > 0) {
      if (commandLine.hasOption(OVERRIDE)) {
        System.out.println("Overriding output directory...");
        try {
          FileUtils.deleteDirectory(out);
        } catch (IOException e) {
          throw new CrawljaxException(e.getMessage(), e);
        }
      } else {
        throw new IllegalStateException(
                "Output directory is not empty. If you want to override, use the -override option");
      }
    }
  }

  private void configureLogging() {
    if (commandLine.hasOption(VERBOSE)) {
      LogUtil.setCrawljaxLogLevel(Level.INFO);
    }
    if (commandLine.hasOption(LOG_FILE)) {
      File f = new File(commandLine.getOptionValue(LOG_FILE));
      try {
        if (!f.exists()) {
          Files.createParentDirs(f);
          Files.touch(f);
        }
      } catch (IOException e) {
        throw new CrawljaxException("Could not create log file: " + e.getMessage(), e);
      }
      Preconditions.checkArgument(f.canWrite());
      LogUtil.logToFile(f.getPath());
    }

  }

  private CrawljaxConfiguration readConfig(String urlValue, String outputDir) {
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(urlValue);

    BrowserType browser = BrowserType.firefox;
    if (commandLine.hasOption(BROWSER)) {
      String browserString = commandLine.getOptionValue(BROWSER);
      browser = getBrowserTypeFromStr(browserString);
    }

    int browsers = 1;
    if (commandLine.hasOption(PARALLEL)) {
      browsers = Integer.parseInt(commandLine.getOptionValue(PARALLEL));
    }
    builder.setBrowserConfig(new BrowserConfiguration(browser, browsers));

    if (commandLine.hasOption(DEPTH)) {
      String depth = commandLine.getOptionValue(DEPTH);
      builder.setMaximumDepth(Integer.parseInt(depth));
    }

    if (commandLine.hasOption(MAXSTATES)) {
      String maxstates = commandLine.getOptionValue(MAXSTATES);
      builder.setMaximumStates(Integer.parseInt(maxstates));
    }

    if (commandLine.hasOption(CRAWL_HIDDEN_ANCHORS)) {
      builder.crawlRules().crawlHiddenAnchors(true);
    }

    configureTimers(builder);

    builder.addPlugin(new CrawlOverview(new File(outputDir)));

    if (commandLine.hasOption(CLICK)) {
      builder.crawlRules().click(commandLine.getOptionValue(CLICK).split(","));
    } else {
      builder.crawlRules().clickDefaultElements();
    }

    return builder.build();
  }

  private void configureTimers(CrawljaxConfigurationBuilder builder) {
    if (commandLine.hasOption(TIME_OUT)) {
      long time = Long.parseLong(commandLine.getOptionValue(TIME_OUT));
      builder.setMaximumRunTime(time, TimeUnit.MINUTES);
    }
    if (commandLine.hasOption(WAIT_AFTER_EVENT)) {
      long time = Long.parseLong(commandLine.getOptionValue(WAIT_AFTER_EVENT));
      builder.crawlRules().waitAfterEvent(time, TimeUnit.MILLISECONDS);
    }
    if (commandLine.hasOption(WAIT_AFTER_RELOAD)) {
      long time = Long.parseLong(commandLine.getOptionValue(WAIT_AFTER_RELOAD));
      builder.crawlRules().waitAfterReloadUrl(time, TimeUnit.MILLISECONDS);
    }
  }

  private BrowserType getBrowserTypeFromStr(String browser) {
    if (browser != null) {
      for (BrowserType b : BrowserType.values()) {
        if (browser.equalsIgnoreCase(b.toString())) {
          return b;
        }
      }
    }
    throw new IllegalArgumentException("Unrecognized browser: '" + browser
            + "'. Available browsers are: " + availableBrowsers());
  }

  private void runIfConfigured() {
    if (config != null) {
      CrawljaxRunner runner = new CrawljaxRunner(config);
      runner.call();
    }
  }

  @VisibleForTesting
  CrawljaxConfiguration getConfig() {
    return config;
  }
}
TOP

Related Classes of com.crawljax.cli.JarRunner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.