Package org.archive.modules.revisit

Examples of org.archive.modules.revisit.IdenticalPayloadDigestRevisit


    @Override
    protected void innerProcess(CrawlURI curi) throws InterruptedException {
        contentDigestHistory.load(curi);

        if (!curi.getContentDigestHistory().isEmpty()) {
          IdenticalPayloadDigestRevisit revisit =
              new IdenticalPayloadDigestRevisit(curi.getContentDigestSchemeString());
      revisit.setRefersToDate((String)curi.getContentDigestHistory().get(A_ORIGINAL_DATE));
      revisit.setRefersToTargetURI((String)curi.getContentDigestHistory().get(A_ORIGINAL_URL));
      String warcRecordId= (String)curi.getContentDigestHistory().get(A_WARC_RECORD_ID);
      if (warcRecordId!=null) {
        revisit.setRefersToRecordID(warcRecordId);
      }
      curi.setRevisitProfile(revisit);
            curi.getAnnotations().add("duplicate:digest");
        }
    }
View Full Code Here


            revisit.setETag((String) latestFetch.get(A_ETAG_HEADER));
            revisit.setLastModified((String) latestFetch.get(A_LAST_MODIFIED_HEADER));
            curi.setRevisitProfile(revisit);
        } else if (hasIdenticalDigest(curi)) {
            curi.getAnnotations().add("duplicate:digest");
            IdenticalPayloadDigestRevisit revisit =
                new IdenticalPayloadDigestRevisit((String)history[1].get(A_CONTENT_DIGEST));
            revisit.setRefersToTargetURI(curi.getURI()); // Matches are always on the same URI
            revisit.setRefersToDate((Long)history[1].get(A_FETCH_BEGAN_TIME));
            curi.setRevisitProfile(revisit);
        }
    }
View Full Code Here

TOP

Related Classes of org.archive.modules.revisit.IdenticalPayloadDigestRevisit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.