linkCount = translatedLinks.size();
}
try {
// Process URI
DelayedOutputStream output = new DelayedOutputStream();
try {
List gatheredLinks;
if (!target.confirmExtensions() && target.followLinks() && this.isCrawlablePage(target)) {
gatheredLinks = new ArrayList();
} else {
gatheredLinks = null;
}
final TreeMap headers = new TreeMap();
headers.put("user-agent", this.userAgent);
headers.put("accept", this.accept);
status =
this.getPage(
target.getDeparameterizedSourceURI(),
this.getLastModified(target),
target.getParameters(),
headers,
target.confirmExtensions() ? translatedLinks : null,
gatheredLinks,
output);
if (status >= 400) {
throw new ProcessingException(
"Resource not found: " + status);
}
if (gatheredLinks != null) {
for (Iterator it = gatheredLinks.iterator();it.hasNext();) {
String linkURI = (String) it.next();
Target linkTarget = target.getDerivedTarget(linkURI);
if (linkTarget == null) {
this.pageSkipped(linkURI, "link does not share same root as parent");
continue;
}
if (!this.isIncluded(linkTarget.getSourceURI())) {
this.pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules");
continue;
}
if (crawler.addTarget(linkTarget)) {
newLinkCount++;
}
}
linkCount = gatheredLinks.size();
}
} catch (ProcessingException pe) {
output.close();
output = null;
this.resourceUnavailable(target);
this.sendBrokenLinkWarning(target.getSourceURI(),
DefaultNotifyingBuilder.getRootCause(pe).getMessage());
} finally {
if (output != null && status != -1) {
ModifiableSource source = this.getSource(target);
try {
pageSize = output.size();
if (this.checksumsURI == null || !this.isSameContent(output, target)) {
OutputStream stream = source.getOutputStream();
output.setFileOutputStream(stream);
output.flush();
output.close();
this.pageGenerated(target.getSourceURI(),
target.getAuthlessDestURI(),
pageSize,
linkCount,
newLinkCount,
crawler.getRemainingCount(),
crawler.getProcessedCount(),
System.currentTimeMillis()- startTimeMillis);
} else {
output.close();
this.pageSkipped(target.getSourceURI(), "Page not changed");
}
} catch (IOException ioex) {
this.log.warn(ioex.toString());
} finally {