Package org.archive.modules

Examples of org.archive.modules.ProcessResult


    @Override
    protected ProcessResult innerProcessResult(CrawlURI curi) {
        synchronized (this) {
            contentSinceCheck += curi.getContentSize();
            if (contentSinceCheck/1024 > getRecheckThresholdKb()) {
                ProcessResult r = checkAvailableSpace(curi);
                contentSinceCheck = 0;
                return r;
            } else {
                return ProcessResult.PROCEED;
            }
View Full Code Here


    fetchHistory[1] = new HashMap<String, Object>();
    fetchHistory[1].put(FetchHistoryHelper.A_TIMESTAMP, expected_ts - 2000);
    fetchHistory[1].put(RecrawlAttributeConstants.A_CONTENT_DIGEST,
        CONTENT_DIGEST_SCHEME + Base32.encode(digestValue1));
   
    ProcessResult result = t.innerProcessResult(curi);
    assertEquals("result is PROCEED", ProcessResult.PROCEED, result);
   
    // newly loaded history entry should fall in between two existing entries (index=1)
    Map<String, Object> history = getFetchHistory(curi, 1);
    assertNotNull("history", history);
View Full Code Here

 
  public void testInnerProcessResultSingleShotWithRealServer() throws Exception {
    WbmPersistLoadProcessor t = new WbmPersistLoadProcessor();
    //CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://archive.org/"));
    CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://www.mext.go.jp/null.gif"));
    ProcessResult result = t.innerProcessResult(curi);
    Map<String, Object> history = getFetchHistory(curi, 0);
    assertNotNull("getFetchHistory returns non-null", history);
    String hash = (String)history.get(RecrawlAttributeConstants.A_CONTENT_DIGEST);
    assertNotNull("CONTENT_DIGEST is non-null", hash);
    assertTrue("CONTENT_DIGEST starts with scheme", hash.startsWith(t.getContentDigestScheme()));
View Full Code Here

            String whoisServer = getWhoisServer(curi);
            String whoisQuery = getWhoisQuery(curi);

            if (whoisServer == null) {
                // e.g. whois:foo.org
                ProcessResult ret = deferOrFinishGeneric(curi, whoisQuery);
                return ret;
            } else {
                // e.g. whois://whois.pir.org/foo.org
                fetch(curi, whoisServer, whoisQuery);
                return ProcessResult.PROCEED;
View Full Code Here

TOP

Related Classes of org.archive.modules.ProcessResult

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.