Package org.apache.nutch.crawl

Examples of org.apache.nutch.crawl.CrawlDb.update()


        }

        paths.add(p);
      }
       
      cdb.update(od.getCrawlDb(), paths.toArray(new Path[paths.size()]),
        true, true);
    }
    else
    {
      Path[] allSegments = getSegments(od);
View Full Code Here


    else
    {
      Path[] allSegments = getSegments(od);
       
      // This just does the last segment created.
      cdb.update(od.getCrawlDb(),
        new Path[] {allSegments[allSegments.length - 1]}, true, true);
    }
  }

  protected Path [] getSegments(final OutputDirectories od)
View Full Code Here

                if (generatedSegment == null) {
                    logger.info("Stopping at depth=" + i + " - no more URLs to fetch.");
                    break;
                }
                fetcher.fetch(generatedSegment, threads, true);
                crawlDbTool.update(crawldbPath, new Path[] { generatedSegment }, true, true);
            }
        } catch (IOException e) {
            logger.error("Exception while crawling", e);
        }
    }
View Full Code Here

        parseSegment.parse(segs[0]);    // parse it, if needed
        delta = System.currentTimeMillis() - start;
        res.addTiming("parse", i + "", delta);
      }
      start = System.currentTimeMillis();
      crawlDbTool.update(crawlDb, segs, true, true); // update crawldb
      delta = System.currentTimeMillis() - start;
      res.addTiming("update", i + "", delta);
      start = System.currentTimeMillis();
      linkDbTool.invert(linkDb, segs, true, true, false); // invert links
      delta = System.currentTimeMillis() - start;
View Full Code Here

        parseSegment.parse(segs[0]);    // parse it, if needed
        delta = System.currentTimeMillis() - start;
        res.addTiming("parse", i + "", delta);
      }
      start = System.currentTimeMillis();
      crawlDbTool.update(crawlDb, segs, true, true); // update crawldb
      delta = System.currentTimeMillis() - start;
      res.addTiming("update", i + "", delta);
      start = System.currentTimeMillis();
      linkDbTool.invert(linkDb, segs, true, true, false); // invert links
      delta = System.currentTimeMillis() - start;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.