Package org.apache.maven.linkcheck

Source Code of org.apache.maven.linkcheck.FileToCheck

package org.apache.maven.linkcheck;

/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation.  All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in
*    the documentation and/or other materials provided with the
*    distribution.
*
* 3. The end-user documentation included with the redistribution,
*    if any, must include the following acknowledgment:
*       "This product includes software developed by the
*        Apache Software Foundation (http://www.apache.org/)."
*    Alternately, this acknowledgment may appear in the software itself,
*    if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
*    "Apache Maven" must not be used to endorse or promote products
*    derived from this software without prior written permission. For
*    written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
*    "Apache Maven", nor may "Apache" appear in their name, without
*    prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation.  For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* ====================================================================
*/

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.linkcheck.validation.LinkValidationItem;
import org.apache.maven.linkcheck.validation.LinkValidationResult;
import org.apache.maven.linkcheck.validation.LinkValidatorManager;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.DOMReader;
import org.w3c.tidy.Tidy;

/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @version $Id: FileToCheck.java,v 1.11 2003/03/07 22:46:39 bwalding Exp $
*
*/
public class FileToCheck
{
    /**
     * Log for debug output
     */
    private static Log LOG = LogFactory.getLog(FileToCheck.class);

    private File base;
    private File fileToCheck;
    private String status = STATUS_OK;
    private String message = "";
    private int successful;
    private int unsuccessful;

    public static final String STATUS_UNKNOWN = null;
    public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
    public static final String STATUS_OK = "OK";

    public FileToCheck(File base, File fileToCheck)
    {
        this.base = base;
        this.fileToCheck = fileToCheck;
    }

    private List links = new ArrayList();

    public void check(LinkValidatorManager lvm) throws Exception
    {
        successful = 0;
        unsuccessful = 0;
        status = STATUS_OK;
        message = "";

        try
        {
            Tidy tidy = new Tidy();
            Document doc = null;

            try
            {
                FileInputStream in = new FileInputStream(fileToCheck);
                tidy.setMakeClean(true);
                tidy.setXmlTags(true);
                tidy.setXmlOut(true);
                tidy.setQuiet(true);
                tidy.setShowWarnings(false);
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                PrintWriter errOut = new PrintWriter(baos);
                tidy.setErrout(errOut);
                LOG.debug("Processing:" + fileToCheck);
                tidy.setXHTML(true);
                org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);

                // now read a dom4j document from
                // JTidy's W3C DOM object

                DOMReader domReader = new DOMReader();
                doc = domReader.read(domDocument);

                LOG.debug(baos.toString());
            }
            catch (Throwable e)
            {
                //We catch Throwable, because there is a chance that the domReader will throw
                //a stack overflow exception for some files
                if (!(e instanceof Exception)) {
                    LOG.info("Caught " + e.toString());
                }
                LinkCheckResult lcr = new LinkCheckResult();
                lcr.setStatus("PARSE FAILURE");
                lcr.setTarget("N/A");
                this.links.add(lcr);
                return;
            }

            List xpathResults = new ArrayList();

            xpathResults.addAll(doc.selectNodes("//a/@href"));
            xpathResults.addAll(doc.selectNodes("//img/@src"));
            //<link rel="stylesheet" href="...">
            xpathResults.addAll(doc.selectNodes("//link/@href"));
            //<script src="http://ar.atwola.com/file/adsWrapper.js">
            xpathResults.addAll(doc.selectNodes("//script/@src"));

            Map uniqueLinks = new HashMap();
            Iterator linkIter = xpathResults.iterator();
            while (linkIter.hasNext())
            {
                Node node = (Node) linkIter.next();
                String href = node.getText();
                uniqueLinks.put(href, href);
            }

            Iterator iter = uniqueLinks.keySet().iterator();
            while (iter.hasNext())
            {
                String href = (String) iter.next();

                //System.out.println("Link Found: " + href);

                LinkCheckResult lcr = new LinkCheckResult();

                LinkValidationItem lvi = new LinkValidationItem(fileToCheck, href);
                LinkValidationResult result = lvm.validateLink(lvi);
                lcr.setTarget(href);

                switch (result.getStatus())
                {
                    case LinkValidationResult.UNKNOWN :
                        unsuccessful++;
                        lcr.setStatus("UNKNOWN REF");
                        break;
                    case LinkValidationResult.VALID :
                        successful++;
                        lcr.setStatus("OK");
                        break;
                    case LinkValidationResult.INVALID :
                        unsuccessful++;
                        lcr.setStatus("NOT FOUND");
                        break;
                }

                this.links.add(lcr);
            }
        }
        catch (Exception e)
        {
            System.err.println(message);
            throw (e);
        }
    }

    /**
     * Returns the message.
     * @return String
     */
    public String getMessage()
    {
        return message;
    }

    /**
     * Returns the status.
     * @return int
     */
    public String getStatus()
    {
        return status;
    }

    /**
     * Sets the message.
     * @param message The message to set
     */
    public void setMessage(String message)
    {
        this.message = message;
    }

    /**
     * Sets the status.
     * @param status The status to set
     */
    public void setStatus(String status)
    {
        this.status = status;
    }

    public List getResults()
    {
        return links;
    }

    /**
     * Returns the successful.
     * @return int
     */
    public int getSuccessful()
    {
        return successful;
    }

    /**
     * Returns the unsuccessful.
     * @return int
     */
    public int getUnsuccessful()
    {
        return unsuccessful;
    }

    public String getName()
    {
        String baseName = base.getAbsolutePath();
        String fileName = fileToCheck.getAbsolutePath();
        if (fileName.startsWith(baseName))
            fileName = fileName.substring(baseName.length() + 1);

        fileName = fileName.replace('\\', '/');
        return fileName;
    }

    public String toXML()
    {
        StringBuffer buf = new StringBuffer();

        buf.append("  <file>\n");
        buf.append("    <name>" + getName() + "</name>\n");
        buf.append("    <successful>" + getSuccessful() + "</successful>\n");
        buf.append("    <unsuccessful>" + getUnsuccessful() + "</unsuccessful>\n");

        Iterator iter = getResults().iterator();
        while (iter.hasNext())
        {
            LinkCheckResult result = (LinkCheckResult) iter.next();
            buf.append(result.toXML());
        }

        buf.append("  </file>\n");

        return buf.toString();
    }

}
TOP

Related Classes of org.apache.maven.linkcheck.FileToCheck

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.