/*
============================================================================
The Apache Software License, Version 1.1
============================================================================
Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
Redistribution and use in source and binary forms, with or without modifica-
tion, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The end-user documentation included with the redistribution, if any, must
include the following acknowledgment: "This product includes software
developed by the Apache Software Foundation (http://www.apache.org/)."
Alternately, this acknowledgment may appear in the software itself, if
and wherever such third-party acknowledgments normally appear.
4. The names "Apache Cocoon" and "Apache Software Foundation" must not be
used to endorse or promote products derived from this software without
prior written permission. For written permission, please contact
apache@apache.org.
5. Products derived from this software may not be called "Apache", nor may
"Apache" appear in their name, without prior written permission of the
Apache Software Foundation.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This software consists of voluntary contributions made by many individuals
on behalf of the Apache Software Foundation and was originally created by
Stefano Mazzocchi <stefano@apache.org>. For more information on the Apache
Software Foundation, please see <http://www.apache.org/>.
*/
package org.apache.cocoon.generation;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.avalon.framework.logger.Logger;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.avalon.framework.service.ServiceManager;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.environment.Context;
import org.apache.cocoon.environment.ObjectModelHelper;
import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.xml.dom.DOMStreamer;
import org.apache.excalibur.source.TraversableSource;
import org.apache.excalibur.xml.dom.DOMParser;
import org.apache.excalibur.xml.xpath.PrefixResolver;
import org.apache.excalibur.xml.xpath.XPathProcessor;
import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* Generates an XML collection listing performing XPath queries on XML sources.
* It can be used both as a plain TraversableGenerator or, if an XPath is
* specified, it will perform an XPath query on every XML resource, where "xml
* resource" is, by default, any resource ending with ".xml", which can be
* overriden by setting the (regexp) pattern "xmlFiles as a sitemap parameter,
* or where the name of the resource has a container-wide mime-type mapping to
* 'text/xml' such as specified by mime-mapping elements in a web.xml
* descriptor file.
*
* The XPath can be specified in two ways:
* <ol>
* <li>By using an XPointerish syntax in the URL: everything following the
* pound sign (possiby preceding query
* string arguments) will be treated as the XPath;
* </li>
* <li>Specifying it as a sitemap parameter named "xpath"
* </ol>
*
* Sample usage:
*
* Sitemap:
* <map:match pattern="documents/**">
* <map:generate type="xpathdirectory"
* src=" docs/{1}#/article/title|/article/abstract" >
* < map:parameter name="xmlFiles" value="\.xml$"/>
* </map:generate>
* <map: serialize type="xml" /> </map:match>
*
* Request:
* http://www.some.host/documents/test
* Result:
* <collection:collection
* name="test" lastModified="1010400942000"
* date="1/7/02 11:55 AM" requested="true"
* xmlns:collection="http://apache.org/cocoon/collection/1.0">
* <collection:collection name="subdirectory" lastModified="1010400942000" date="1/7/02 11:55 AM" />
* <collection:resource name="test.xml" lastModified="1011011579000" date="1/14/02 1:32 PM">
* <collection:xpath docid="test.xml" query="/article/title">
* <title>This is a test document</title>
* <abstract>
* <para>Abstract of my test article</para>
* </abstract>
* </collection:xpath>
* </collection:resource>
* <collection:resource name="test.gif" lastModified="1011011579000" date="1/14/02 1:32 PM">
* </collection:collection>
*
* If you need to use namespaces, you can set them as sitemap parameters in
* the form:
* lt;map:parameter name="xmlns:<i>your prefix</i>" value="nsURI"/**">
*
* @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
* @author <a href="mailto:d.madama@pro-netics.com">Daniele Madama</a>
* @version CVS $Id: XPathTraversableGenerator.java,v 1.3 2003/10/28 14:08:50 unico Exp $
*/
public class XPathTraversableGenerator extends TraversableGenerator {
/** Local name for the element that contains the included XML snippet. */
protected static final String XPATH_NODE_NAME = "xpath";
/** Attribute for the XPath query. */
protected static final String QUERY_ATTR_NAME = "query";
/** The document containing a successful XPath query */
protected static final String RESULT_DOCID_ATTR = "docid";
/** The regular expression for the XML files pattern. */
protected RE xmlRE = null;
/** The document that should be parsed and (partly) included. */
protected Document doc = null;
/** The XPath. */
protected String xpath = null;
/** The XPath processor. */
protected XPathProcessor processor = null;
/** The parser for the XML snippets to be included. */
protected DOMParser parser = null;
/** The prefix resolver for namespaced queries */
protected XPathPrefixResolver prefixResolver;
/** The cocoon context used for mime-type mappings */
protected Context context;
public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par)
throws ProcessingException, SAXException, IOException {
super.setup(resolver, objectModel, src, par);
// See if an XPath was specified
int pointer;
if ((pointer = this.source.indexOf("#")) != -1) {
int endpointer = this.source.indexOf('?');
if (endpointer != -1) {
this.xpath = source.substring(pointer + 1, endpointer);
} else {
this.xpath = source.substring(pointer + 1);
}
this.source = src.substring(0, pointer);
if (endpointer != -1) {
this.source += src.substring(endpointer);
}
this.cacheKeyParList.add(this.xpath);
if (this.getLogger().isDebugEnabled())
this.getLogger().debug("Applying XPath: " + xpath
+ " to collection " + source);
} else {
this.xpath = par.getParameter("xpath", null);
this.cacheKeyParList.add(this.xpath);
this.getLogger().debug("Applying XPath: " + xpath
+ " to collection " + source);
}
String xmlFilesPattern = null;
try {
xmlFilesPattern = par.getParameter("xmlFiles", "\\.xml$");
this.cacheKeyParList.add(xmlFilesPattern);
this.xmlRE = new RE(xmlFilesPattern);
if (this.getLogger().isDebugEnabled()) {
this.getLogger().debug("pattern for XML files: " + xmlFilesPattern);
}
} catch (RESyntaxException rese) {
throw new ProcessingException("Syntax error in regexp pattern '"
+ xmlFilesPattern + "'", rese);
}
String[] params = par.getNames();
this.prefixResolver = new XPathPrefixResolver(this.getLogger());
for (int i = 0; i < params.length; i++) {
if (params[i].startsWith("xmlns:")) {
String paramValue = par.getParameter(params[i], "");
String paramName = params[i].substring(6);
if (getLogger().isDebugEnabled()) {
getLogger().debug("add param to prefixResolver: " + paramName);
}
this.prefixResolver.addPrefix(paramName, paramValue);
}
}
this.context = ObjectModelHelper.getContext(objectModel);
}
public void service(ServiceManager manager) throws ServiceException {
super.service(manager);
processor = (XPathProcessor)manager.lookup(XPathProcessor.ROLE);
parser = (DOMParser)manager.lookup(DOMParser.ROLE);
}
public void dispose() {
if ( this.manager != null ) {
this.manager.release( processor );
this.manager.release( parser );
this.processor = null;
this.parser = null;
}
super.dispose();
}
protected void addContent(TraversableSource source) throws SAXException, ProcessingException {
super.addContent(source);
if (!source.isCollection() && isXML(source) && xpath != null) {
performXPathQuery(source);
}
}
/**
* Determines if a given TraversableSource shall be handled as XML.
*
* @param path the TraversableSource to check
* @return true if the given TraversableSource shall handled as XML, false
* otherwise.
*/
protected boolean isXML(TraversableSource path) {
String mimeType = this.context.getMimeType(path.getName());
return this.xmlRE.match(path.getName()) || "text/xml".equalsIgnoreCase(mimeType);
}
/**
* Performs an XPath query on the source.
* @param xmlFile the Source the XPath is performed on.
* @throws SAXException if something goes wrong while adding the XML snippet.
*/
protected void performXPathQuery(TraversableSource in)
throws SAXException {
doc = null;
try {
doc = parser.parseDocument(new InputSource(in.getInputStream()));
} catch (SAXException se) {
this.getLogger().error("Warning:" + in.getName()
+ " is not a valid XML document. Ignoring");
} catch (Exception e) {
this.getLogger().error("Unable to resolve and parse document" + e);
}
if (doc != null) {
NodeList nl = processor.selectNodeList(doc.getDocumentElement(), xpath, this.prefixResolver);
final String id = in.getName();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", RESULT_DOCID_ATTR, RESULT_DOCID_ATTR,
" CDATA", id);
attributes.addAttribute("", QUERY_ATTR_NAME, QUERY_ATTR_NAME, "CDATA",
xpath);
super.contentHandler.startElement(URI, XPATH_NODE_NAME, PREFIX + ":" + XPATH_NODE_NAME, attributes);
DOMStreamer ds = new DOMStreamer(super.xmlConsumer);
for (int i = 0; i < nl.getLength(); i++)
ds.stream(nl.item(i));
super.contentHandler.endElement(URI, XPATH_NODE_NAME, PREFIX + ":" + XPATH_NODE_NAME);
}
}
/**
* Recycle resources
*
*/
public void recycle() {
super.recycle();
this.xpath = null;
this.attributes = null;
this.doc = null;
this.xmlRE = null;
this.prefixResolver = null;
this.context = null;
}
/**
* A brain-dead PrefixResolver implementation
*
*/
class XPathPrefixResolver implements PrefixResolver {
private Map params;
private Logger logger;
public XPathPrefixResolver(Logger logger) {
this.params = new HashMap();
this.logger = logger;
}
/**
* Get a namespace URI given a prefix.
*
* @see org.apache.excalibur.xml.xpath.PrefixResolver#prefixToNamespace(java.lang.String)
*/
public String prefixToNamespace(String prefix) {
if (this.logger.isDebugEnabled()) {
this.logger.debug("prefix: " + prefix);
}
if (this.params.containsKey(prefix)) {
if(this.logger.isDebugEnabled()) {
this.logger.debug("prefix; " + prefix + " - namespace: " + this.params.get(prefix));
}
return (String) this.params.get(prefix);
}
return null;
}
public void addPrefix(String prefix, String uri) {
this.params.put(prefix, uri);
}
}
}