/**
* License Agreement.
*
* Rich Faces - Natural Ajax for Java Server Faces (JSF)
*
* Copyright (C) 2007 Exadel, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.ajax4jsf.webapp.tidy;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.LinkedList;
import java.util.Map;
import java.util.Properties;
import org.ajax4jsf.Messages;
import org.ajax4jsf.application.AjaxViewHandler;
import org.ajax4jsf.context.SAXEndElementEvent;
import org.ajax4jsf.context.SAXStartElementEvent;
import org.ajax4jsf.context.SAXTextEvent;
import org.ajax4jsf.org.w3c.tidy.Lexer;
import org.ajax4jsf.org.w3c.tidy.Node;
import org.ajax4jsf.org.w3c.tidy.Tidy;
import org.ajax4jsf.org.w3c.tidy.TidyMessage;
import org.ajax4jsf.org.w3c.tidy.TidyMessageListener;
import org.ajax4jsf.webapp.HtmlParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
/**
* @author asmirnov@exadel.com (latest modification by $Author: alexsmirnov $)
* @version $Revision: 1.1.2.1 $ $Date: 2007/01/09 18:59:17 $
*
*/
public class TidyParser implements TidyMessageListener, HtmlParser {
private static final Log log = LogFactory.getLog(TidyParser.class);
private Tidy tidy;
private String _viewState;
private String _encoding;
private String _outputEncoding;
private Object[] headEvents;
private static final String[] _htmlTypes = { "text/html" };
private static final String[] _xmlTypes = { "text/xml" };// "text/xml"};
private static final String[] _xhtmlTypes = { "application/xhtml+xml" };
private static final String TIDY_MARK = "[TIDY_MESSAGE]";
/**
*
*/
public TidyParser(Properties props) {
tidy = new Tidy();
tidy.setConfigurationFromProps(props);
// tidy.setConfigurationFromProps(getProperties("tidy.properties"));
PrintWriter errout = new PrintWriter(new ErrorWriter());
tidy.setErrout(errout);
tidy.setForceOutput(true);
tidy.setHideEndTags(false);
tidy.setMessageListener(this);
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#parseHtml(java.lang.Object,
* java.io.OutputStream)
*/
public Document parseHtmlByTidy(Object input, Writer output)
throws IOException {
Document document = tidy.parseDOM(input, null);
if (null != document) {
Element documentElement = document.getDocumentElement();
if (null != documentElement) {
// Replace state elements with real stored.
NodeList inputs = documentElement.getElementsByTagName("span");
for (int i = 0; i < inputs.getLength(); i++) {
Element node = (Element) inputs.item(i);
if (AjaxViewHandler.STATE_MARKER_KEY.equals(node
.getAttribute("id"))
&& AjaxViewHandler.STATE_MARKER_KEY.equals(node
.getAttribute("name"))) {
// State marker - replace with real.
org.w3c.dom.Node parentNode = node.getParentNode();
if (null != _viewState) {
parentNode.replaceChild(document
.createCDATASection(_viewState), node);
} else {
// Remove marker element, but keep it content.
if (node.hasChildNodes()) {
org.w3c.dom.Node nextSibling = node
.getNextSibling();
NodeList childNodes = node.getChildNodes();
// Copy all nodes by temporary array ( since
// moving nodes in iteration
// modify NodeList with side effects.
org.w3c.dom.Node[] childArray = new org.w3c.dom.Node[childNodes
.getLength()];
for (int j = 0; j < childArray.length; j++) {
childArray[j] = childNodes.item(j);
}
for (int j = 0; j < childArray.length; j++) {
parentNode.insertBefore(childArray[j],
nextSibling);
}
}
parentNode.removeChild(node);
}
}
}
// Inserts scripts and styles to head.
if ((null != headEvents && headEvents.length > 0) || null != _viewState) {
// find head
org.w3c.dom.Node head = documentElement
.getElementsByTagName("head").item(0);
// Insert empty if not found
if (null == head) {
head = document.createElement("head");
documentElement.insertBefore(head, documentElement
.getFirstChild());
}
NodeList headChildren = head.getChildNodes();
org.w3c.dom.Node child = null;
for (int i = 0; i < headChildren.getLength(); i++) {
child = headChildren.item(i);
if (child instanceof Element) {
String nodeName = ((Element) child).getNodeName();
if (!("title".equalsIgnoreCase(nodeName) || "base"
.equalsIgnoreCase(nodeName))) {
break;
}
}
child = null;
}
LinkedList<Element> nodesStack = new LinkedList<Element>();
for (Object event : headEvents) {
if (event instanceof SAXStartElementEvent) {
SAXStartElementEvent startElement = (SAXStartElementEvent) event;
Element element = document.createElement(startElement.getName());
for (Map.Entry<String, String> attributeEntry : startElement.getAttributes().entrySet()) {
element.setAttribute(attributeEntry.getKey(), attributeEntry.getValue());
}
if (nodesStack.isEmpty()) {
head.insertBefore(element, child);
} else {
nodesStack.peek().appendChild(element);
}
nodesStack.addFirst(element);
} else if (event instanceof SAXTextEvent) {
SAXTextEvent textEvent = (SAXTextEvent) event;
nodesStack.peek().appendChild(document.createTextNode(textEvent.getText()));
} else if (event instanceof SAXEndElementEvent) {
nodesStack.removeFirst();
} else {
throw new IllegalArgumentException();
}
}
}
}
if (null != output) {
tidy.pprint(document, output);
}
}
return document;
}
public void parseHtml(InputStream input, Writer output) throws IOException {
this.parseHtmlByTidy(input, output);
}
public void parseHtml(Reader input, Writer output) throws IOException {
this.parseHtmlByTidy(input, output);
}
/*
* (non-Javadoc)
*
* @see org.w3c.tidy.TidyMessageListener#messageReceived(org.w3c.tidy.TidyMessage)
*/
public void messageReceived(TidyMessage message) {
// TODO record messages for output.
// TODO change signature for receive current node and append message in
// tree.
if (log.isDebugEnabled()) {
log.debug(Messages.getMessage(Messages.MESSAGE_PARSING_INFO,
message.getMessage()));
}
// TODO - configurable output for reeors in page.
if (false) {
Lexer lexer = message.getLexer();
Node element = message.getElement();
if (null == element) {
element = lexer.getLastNode();
} else if ("style".equalsIgnoreCase(element.getElement())
|| "script".equalsIgnoreCase(element.getElement())) {
element = lexer.getLastNode();
}
// Insert comment about error.
if (null != element) {
String messageText = message.getMessage();
byte[] msg = (TIDY_MARK + messageText + TIDY_MARK).getBytes();
Node comment = lexer.newNode(Node.COMMENT_TAG, msg, 0,
msg.length);
// TODO - detect style or script elements - not allow comment in
// it.
element.insertNodeAtEnd(comment);
}
}
}
/**
* 'null' writer for discard errors - since processed in message listener
*
* @author shura (latest modification by $Author: alexsmirnov $)
* @version $Revision: 1.1.2.1 $ $Date: 2007/01/09 18:59:17 $
*
*/
private static class ErrorWriter extends Writer {
/*
* (non-Javadoc)
*
* @see java.io.Writer#write(char[], int, int)
*/
public void write(char[] cbuf, int off, int len) throws IOException {
// do nothing
}
/*
* (non-Javadoc)
*
* @see java.io.Writer#flush()
*/
public void flush() throws IOException {
// do nothing
}
/*
* (non-Javadoc)
*
* @see java.io.Writer#close()
*/
public void close() throws IOException {
// do nothing
}
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setEncoding(java.lang.String)
*/
public void setInputEncoding(String encoding) {
if (null != encoding) {
this._encoding = encoding;
if (null != encoding) {
this.tidy.setInputEncoding(encoding);
}
// this.tidy.setOutputEncoding(encoding);
}
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setEncoding(java.lang.String)
*/
public void setOutputEncoding(String encoding) {
if (null != encoding) {
this._outputEncoding = encoding;
// this.tidy.setInputEncoding(encoding);
if (null != encoding) {
this.tidy.setOutputEncoding(encoding);
}
}
}
/**
* Setup properly tidy output for given mime type. return {@link false } if
* this type not supported by tidy.
*
* @param mimeType
* @return
*/
public boolean setMime(String mimeType) {
if (null != mimeType) {
for (int i = 0; i < _htmlTypes.length; i++) {
String mime = _htmlTypes[i];
if (mimeType.startsWith(mime)) {
// setup html output
tidy.setXHTML(false);
tidy.setXmlOut(false);
if (log.isDebugEnabled()) {
log.debug("Print output as ordinary HTML");
}
return true;
}
}
for (int i = 0; i < _xhtmlTypes.length; i++) {
String mime = _xhtmlTypes[i];
if (mimeType.startsWith(mime)) {
// setup xhtml output
tidy.setXHTML(true);
tidy.setXmlOut(false);
if (log.isDebugEnabled()) {
log.debug("Print output as XHTML");
}
return true;
}
}
for (int i = 0; i < _xmlTypes.length; i++) {
String mime = _xmlTypes[i];
if (mimeType.startsWith(mime)) {
// setup html output
tidy.setXHTML(true);
tidy.setXmlOut(true);
tidy.setXmlPi(true);
tidy.setEscapeCdata(false);
tidy.setNumEntities(true);
if (log.isDebugEnabled()) {
log.debug("Print output as XML");
}
return true;
}
}
}
return false;
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setMoveElements(boolean)
*/
public void setMoveElements(boolean move) {
tidy.setMoveElements(move);
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setDoctype(java.lang.String)
*/
public void setDoctype(String doctype) {
tidy.setDocType("omit");//doctype);
}
/*
* (non-Javadoc)
*
* @see org.ajax4jsf.webapp.HtmlParser#setViewState(java.lang.String)
*/
public void setViewState(String viewState) {
_viewState = viewState;
}
public void setHeadEvents(Object[] headEvents) {
this.headEvents = headEvents;
}
}