package org.apache.maven.linkcheck;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Maven" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Maven", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* ====================================================================
*/
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.linkcheck.validation.LinkValidationItem;
import org.apache.maven.linkcheck.validation.LinkValidationResult;
import org.apache.maven.linkcheck.validation.LinkValidatorManager;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.DOMReader;
import org.w3c.tidy.Tidy;
/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @version $Id: FileToCheck.java,v 1.11 2003/03/07 22:46:39 bwalding Exp $
*
*/
public class FileToCheck
{
/**
* Log for debug output
*/
private static Log LOG = LogFactory.getLog(FileToCheck.class);
private File base;
private File fileToCheck;
private String status = STATUS_OK;
private String message = "";
private int successful;
private int unsuccessful;
public static final String STATUS_UNKNOWN = null;
public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
public static final String STATUS_OK = "OK";
public FileToCheck(File base, File fileToCheck)
{
this.base = base;
this.fileToCheck = fileToCheck;
}
private List links = new ArrayList();
public void check(LinkValidatorManager lvm) throws Exception
{
successful = 0;
unsuccessful = 0;
status = STATUS_OK;
message = "";
try
{
Tidy tidy = new Tidy();
Document doc = null;
try
{
FileInputStream in = new FileInputStream(fileToCheck);
tidy.setMakeClean(true);
tidy.setXmlTags(true);
tidy.setXmlOut(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter errOut = new PrintWriter(baos);
tidy.setErrout(errOut);
LOG.debug("Processing:" + fileToCheck);
tidy.setXHTML(true);
org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
// now read a dom4j document from
// JTidy's W3C DOM object
DOMReader domReader = new DOMReader();
doc = domReader.read(domDocument);
LOG.debug(baos.toString());
}
catch (Throwable e)
{
//We catch Throwable, because there is a chance that the domReader will throw
//a stack overflow exception for some files
if (!(e instanceof Exception)) {
LOG.info("Caught " + e.toString());
}
LinkCheckResult lcr = new LinkCheckResult();
lcr.setStatus("PARSE FAILURE");
lcr.setTarget("N/A");
this.links.add(lcr);
return;
}
List xpathResults = new ArrayList();
xpathResults.addAll(doc.selectNodes("//a/@href"));
xpathResults.addAll(doc.selectNodes("//img/@src"));
//<link rel="stylesheet" href="...">
xpathResults.addAll(doc.selectNodes("//link/@href"));
//<script src="http://ar.atwola.com/file/adsWrapper.js">
xpathResults.addAll(doc.selectNodes("//script/@src"));
Map uniqueLinks = new HashMap();
Iterator linkIter = xpathResults.iterator();
while (linkIter.hasNext())
{
Node node = (Node) linkIter.next();
String href = node.getText();
uniqueLinks.put(href, href);
}
Iterator iter = uniqueLinks.keySet().iterator();
while (iter.hasNext())
{
String href = (String) iter.next();
//System.out.println("Link Found: " + href);
LinkCheckResult lcr = new LinkCheckResult();
LinkValidationItem lvi = new LinkValidationItem(fileToCheck, href);
LinkValidationResult result = lvm.validateLink(lvi);
lcr.setTarget(href);
switch (result.getStatus())
{
case LinkValidationResult.UNKNOWN :
unsuccessful++;
lcr.setStatus("UNKNOWN REF");
break;
case LinkValidationResult.VALID :
successful++;
lcr.setStatus("OK");
break;
case LinkValidationResult.INVALID :
unsuccessful++;
lcr.setStatus("NOT FOUND");
break;
}
this.links.add(lcr);
}
}
catch (Exception e)
{
System.err.println(message);
throw (e);
}
}
/**
* Returns the message.
* @return String
*/
public String getMessage()
{
return message;
}
/**
* Returns the status.
* @return int
*/
public String getStatus()
{
return status;
}
/**
* Sets the message.
* @param message The message to set
*/
public void setMessage(String message)
{
this.message = message;
}
/**
* Sets the status.
* @param status The status to set
*/
public void setStatus(String status)
{
this.status = status;
}
public List getResults()
{
return links;
}
/**
* Returns the successful.
* @return int
*/
public int getSuccessful()
{
return successful;
}
/**
* Returns the unsuccessful.
* @return int
*/
public int getUnsuccessful()
{
return unsuccessful;
}
public String getName()
{
String baseName = base.getAbsolutePath();
String fileName = fileToCheck.getAbsolutePath();
if (fileName.startsWith(baseName))
fileName = fileName.substring(baseName.length() + 1);
fileName = fileName.replace('\\', '/');
return fileName;
}
public String toXML()
{
StringBuffer buf = new StringBuffer();
buf.append(" <file>\n");
buf.append(" <name>" + getName() + "</name>\n");
buf.append(" <successful>" + getSuccessful() + "</successful>\n");
buf.append(" <unsuccessful>" + getUnsuccessful() + "</unsuccessful>\n");
Iterator iter = getResults().iterator();
while (iter.hasNext())
{
LinkCheckResult result = (LinkCheckResult) iter.next();
buf.append(result.toXML());
}
buf.append(" </file>\n");
return buf.toString();
}
}