Package net.sf.regain.crawler

Examples of net.sf.regain.crawler.Profiler


    } catch (Throwable thr) {
      mLog.error("Initializing preparator failed", thr);
    }

    mLog.info("Testing preparator " + prepName + "...");
    Profiler profiler = new Profiler(prepName, "docs");

    File typeDir = new File(docDir, fileType);
    File prepOutputDir = new File(outputDir, prepName);
    if (!prepOutputDir.exists()) {
      if (!prepOutputDir.mkdir()) {
        mLog.error("Could not create output dir: " + prepOutputDir.getAbsolutePath());
        System.exit(1);
      }
    }

    String sourceUrl = RegainToolkit.fileToUrl(typeDir);
    File[] docFileArr = typeDir.listFiles();
    if (docFileArr == null) {
      mLog.info("No test docs for preparator " + prepName + " found in " + typeDir.getAbsolutePath());
      return;
    }
    for (int i = 0; i < docFileArr.length; i++) {
      if (docFileArr[i].isFile()) {
        String url = RegainToolkit.fileToUrl(docFileArr[i]);
        mLog.info("Preparing document: " + url);
        try {
          RawDocument doc = new RawDocument(url, sourceUrl, null, null);

          profiler.startMeasuring();
          String content;
          try {
            prep.prepare(doc);
            content = prep.getCleanedContent();
            prep.cleanUp();
            profiler.stopMeasuring(docFileArr[i].length());
          } catch (Throwable thr) {
            profiler.abortMeasuring();
            throw thr;
          }

          File outFile = new File(prepOutputDir, docFileArr[i].getName() + ".txt");
          RegainToolkit.writeToFile(content, outFile);
View Full Code Here


    // Create a profiler for each preparator
    mPreparatorProfilerArr = new Profiler[mPreparatorArr.length];
    for (int i = 0; i < mPreparatorProfilerArr.length; i++) {
      String name = mPreparatorArr[i].getClass().getName();
      mPreparatorProfilerArr[i] = new Profiler("Preparator " + name, "docs");
    }

    // Create the CrawlerAccessController
    String accessClass = config.getCrawlerAccessControllerClass();
    if (accessClass != null) {
View Full Code Here

TOP

Related Classes of net.sf.regain.crawler.Profiler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.