/*
* JBoss, Home of Professional Open Source
* Copyright 2005, JBoss Inc., and individual contributors as indicated
* by the @authors tag. See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.jbpm.pvm.internal.xml;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.jbpm.internal.log.Log;
import org.jbpm.pvm.internal.stream.StreamInput;
import org.jbpm.pvm.internal.util.UrlEntity;
import org.jbpm.pvm.internal.util.XmlUtil;
import org.jbpm.pvm.internal.wire.Descriptor;
import org.jbpm.pvm.internal.wire.descriptor.ArgDescriptor;
import org.jbpm.pvm.internal.wire.xml.WireParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
/** makes typical usage of JAXP more convenient, adds a binding framework,
* entity resolution and error handling.
*
* <h2>Purpose</h2>
* <p>This is a base parser for the common pattern where first JAXP is used
* to parse xml into a Document Object Model (DOM), and then, this DOM is
* examined to build a domain model object. The main purpose of this parser
* is to serve as a base class for implementing such parsers and to provide
* a more convenient API for working with JAXP.
* </p>
*
* <p>A {@link Parser} is a thread safe object. For each parse operation, a
* new {@link Parse} object is created with method {@link #createParse()}.
* Then the parse object is used to specify the input source, execute the
* parse operation and extract the results.
* </p>
*
* <p>{@link Binding}s capture parsing of a certain element type. This way,
* the parser becomes more modular and customizable.
* </p>
*
* <p>{@link Entity Entities} are schema's that specify the grammar of the
* XML files that are parsed by the parser.
* </p>
*
* <h2>API Usage</h2>
* <p>Parsers can be customized by inheritance (that will be covered below),
* but a parser can also be used as is:
* </p>
*
* <pre><i> 1 </i>| static Parser parser = new Parser();
*<i> 2 </i>|
*<i> 3 </i>| void someMethod() {
*<i> 4 </i>| MyDomainObject mdo = (MyDomainObject) parser
*<i> 5 </i>| .createParse()
*<i> 6 </i>| .setString(myXmlString)
*<i> 7 </i>| .execute()
*<i> 8 </i>| .checkProblems()
*<i> 9 </i>| .getDocumentObject();
*<i>10 </i>| }
* </pre>
*
* <p><b>line 1</b> shows that a single parser can be used for all threads as
* the parser is maintained in a static member field.
* </p>
*
* <p><b>line 5</b> shows that a new parse operation is always started with
* the {@link #createParse()} operation. The {@link Parse} object that is
* returned will maintain all data that is related to that single parse
* operation.
* </p>
*
* <p><b>line 6</b> shows how a simple XML string can be provided as the input
* source for the parse operation. Alternative methods to specify the input
* source are {@link Parse#setFile(java.io.File)},
* {@link Parse#setInputStream(java.io.InputStream)},
* {@link Parse#setInputSource(InputSource)},
* {@link Parse#setUrl(java.net.URL)} and
* {@link Parse#setStreamSource(StreamInput)}.
* </p>
*
* <p><b>line 7</b> shows how the execution of the parse is performed. The
* input source will be read, the resulting Document Object Model (DOM) will
* be walked and potentially problems are produced in the parse.
* </p>
*
* <p><b>line 8</b> shows how an exception can be thrown in case of an error.
* The parse execution itself tries to keep parsing as much as possible to
* provide the developer with as much feedback as possible in one parse cycle.
* The {@link Parse#getProblems() problems} are silently captured in the parse
* object. If an exception is thrown by
* {@link Parse#checkErrors(String)}, it will contain a report of
* all the parsing problems. Alternatively, the {@link Parse#getProblems() problems
* in the parse object} could be examined directly without the need for an exception.
* </p>
*
* <p><b>line 9</b> shows how the result of the parse operation is extracted
* from the parse object.
* </p>
*
* <h2 id="binding">Binding</h2>
* <p>Bindings are the link between a certain type of element in your XML document
* and the corresponding java object in your domain model.</p>
*
* <p>A parser can be configured with a set of {@link Binding}s. Each {@link Binding}
* knows how to transform a dom element of a given tagName to the corresponding Java
* object. {@link Bindings} has a notion of binding categories. For example, activities
* and actions can be seen as different categories in jPDL.
* </p>
*
* <p>The purpose of bindings is to make certain elements in the parsing configurable.
* E.g. in jPDL, the main structure of the document is fixed. But activity types can be
* added dynamically.
* </p>
*
* <p>The current {@link Bindings} implementation only supports matching of an
* element with a {@link Binding} based on tagName. If you want to take other things
* into account (e.g. when you want to differentiate between elements of the same
* tagName with a different attribute value), you can create a specialized
* {@link Bindings} class.</p>
*
* <p>Bindings are added by tagName, but they have to be looked up by element. That is
* to support more specialized bindings implementations that match an element with a
* binding on more information then just the tagName. In that case, a specialized subclass of
* {@link Binding} should be created and the method {@link #getBinding(Element, String)} and
* constructor {@link Bindings#Bindings(Bindings)} should be provided
* with the more specialized matching behaviour.
* </p>
*
* <h2 id="objectstack">Object stack</h2>
* <p>When implementing {@link Binding}s, you might want to make use of the
* contextual object stack that is provided on the {@link Parse}. The
* {@link Binding} implementations can maintain Java objects on that stack
* that are being created.
* </p>
*
* <p>E.g. you could push the ProcessDefinition element onto the object stack while it
* is being parsed like this:
* </p>
*
* <pre>public class MyProcessBinding implements Binding {
*
* public Object parse(Element element, Parse parse, Parser parser) {
* <i>// instantiate the object for this binding</i>
* MyProcess myProcess = new MyProcess();
*
* <i>// collect all the child elements of element</i>
* List<Element> elements = XmlUtil.elements(element);
*
* <i>// push my processDefinition onto the object stack</i>
* parse.pushObject(myProcess);
* try {
*
* for (Element activityElement: elements) {
* // parse the child elements with the bindings in category "activity"
* parseElement(activityElement, parse, "activity");
* }
* } finally {
* // make sure my processDefinition is popped.
* parse.popObject();
* }
* return myProcess;
* }
* }
* </pre>
*
* <p>Then, activity bindings might access the processDefinition like this:
* </p>
*
* <pre>public class MyNodeBinding implements Binding {
*
* public Object parse(Element element, Parse parse, Parser parser) {
* <i>// instantiate the object for this binding</i>
* MyNode myNode = new MyNode();
*
* <i>// add the activity to the processDefinition</i>
* MyProcess myProcess = parse.findObject(MyProcess.class);
* myProcess.addNode(myNode);
* myNode.setMyProcess(myProcess);
*
* return myNode;
* }
* }
* </pre>
*
* <p>A parser implementation will typically have a static Bindings object that
* is leveraged in all parser objects. To customize bindings for a such a parser
* be sure to make a deep copy with {@link Bindings#Bindings(Bindings)} before
* you start adding more bindings to the specialized parser. Otherwise the
* base parser's bindings will be updated as well.
* </p>
*
* <h2 id="buildingcustomparsers">Building custom parsers</h2>
*
* <p>This parser is build for inheritance.
* Overriding method {@link #parseDocumentElement(Element, Parse)} can be an easy
* way to start writing your own logic on walking the Document Object Model (DOM).
* Such customizations can still be combined with the usage of
* <a href="#binding">bindings</a>.
* </p>
*
* <h2 id="entityresolving">Entity resolving</h2>
* <p>A parser can be configured with a set of entities with the
* {@link #addEntity(String, Entity)} method. The {@link UrlEntity} has
* a convenience method to build entities from resources
* {@link UrlEntity#UrlEntity(String, ClassLoader)}.
* </p>
*
* <p>When a document builder is created, the default implementation of the
* {@link #setEntityResolver(DocumentBuilder)} will set this parser as the entity resolver.
* The implementation method of {@link EntityResolver} ({@link #resolveEntity(String, String)}
* will use the added {@link Entity}s to try and find a match based on the
* publicId. If one is found, the {@link Entity} inputSource is returned, otherwise
* the systemId is used.
* </p>
*
* <p>This class is intended to be used with aggregation as well as inheritence.
* </p>
*
* @author Tom Baeyens
*/
public class Parser {
private static Log log = Log.getLog(Parser.class.getName());
protected SAXParserFactory saxParserFactory;
protected String[] schemaResources;
protected DocumentBuilderFactory documentBuilderFactory = null;
protected Bindings bindings = null;
protected ClassLoader classLoader = null;
/** the default parser */
public Parser() {
initialize();
}
/** creates a new Parser with bindings that can be maintained statically in
* specialized subclasses of Parser. */
public Parser(Bindings bindings) {
initialize();
this.bindings = bindings;
}
/** creates a new Parser with bindings and entities that can be maintained statically
* in specialized subclasses of Parser.
* @deprecated entities should be replaced by {@link #setSchemaResources(List)} */
public Parser(Bindings bindings, Map<String, Entity> entities) {
initialize();
this.bindings = bindings;
}
// initialization ///////////////////////////////////////////////////////////
public void initialize() {
initializeSaxParserFactory();
initializeDocumentBuilderFactory();
}
public void initializeDocumentBuilderFactory() {
documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setNamespaceAware(true);
}
public void initializeSaxParserFactory() {
saxParserFactory = SAXParserFactory.newInstance();
saxParserFactory.setNamespaceAware(true);
}
// document builder methods /////////////////////////////////////////////////
/** customizable creation of a new document builder. Used by
* {@link #buildDom(Parse)}. */
protected DocumentBuilder createDocumentBuilder(Parse parse) {
try {
parse.documentBuilder = documentBuilderFactory.newDocumentBuilder();
} catch (Exception e) {
parse.addProblem("couldn't get new document builder", e);
return null;
}
parse.documentBuilder.setErrorHandler(parse);
return parse.documentBuilder;
}
// schema validation ////////////////////////////////////////////////////////
public void setSchemaResources(List<String> resources) {
saxParserFactory.setValidating(true);
saxParserFactory.setNamespaceAware(true);
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
List<String> schemaLocations = new ArrayList<String>(resources.size());
for (String schemaResource: resources) {
URL schemaUrl = classLoader.getResource(schemaResource);
if (schemaUrl!=null) {
String schemaLocation = schemaUrl.toString();
log.trace("schema resource found: " + schemaResource);
schemaLocations.add(schemaLocation);
} else {
log.debug("skipping unavailble schema resource: " + schemaResource);
}
}
schemaResources = schemaLocations.toArray(new String[schemaLocations.size()]);
}
// bindings /////////////////////////////////////////////////////////////////
/** the handlers for specific element types */
public Bindings getBindings() {
return bindings;
}
/** set the handlers for specific element types */
public void setBindings(Bindings bindings) {
this.bindings = bindings;
}
/** the handler for the given element */
public Binding getBinding(Element element) {
return getBinding(element, null);
}
/** the handler for the given element limited to a given category */
public Binding getBinding(Element element, String category) {
return (bindings!=null ? bindings.getBinding(element, category) : null);
}
// runtime parsing methods //////////////////////////////////////////////////
/** main method to start a new parse, check {@link Parse} for specifying
* input, executing the parse and extracting the results. */
public Parse createParse() {
return new Parse(this);
}
/** builds a dom from the importedStreamSource and appends the child elements
* of the document element to the destination element. Problems are reported
* in the importingParse. */
public void importStream(StreamInput importedStreamInput, Element destination, Parse importingParse) {
try {
// build the dom of the imported document
Parse importedParse = createParse();
importedParse.setStreamSource(importedStreamInput);
Document importedDocument = buildDom(importedParse);
// loop over all the imported document elements
Element importedDocumentElement = importedDocument.getDocumentElement();
for(Element e : XmlUtil.elements(importedDocumentElement)) {
// import the element into the destination element
destination.appendChild(destination.getOwnerDocument().importNode(e, true));
}
} catch (Exception e) {
importingParse.addProblem("couldn't import "+importedStreamInput, e);
}
}
/** customizable parse execution */
protected void execute(Parse parse) {
try {
if (parse.document==null) {
parse.document = buildDom(parse);
}
// walk the dom tree
if (parse.document!=null) {
try {
// walk the dom tree
parseDocument(parse.document, parse);
} catch (Exception e) {
parse.addProblem("parsing exception: "+e.getMessage(), e);
}
}
} finally {
if (parse.inputStream!=null) {
try {
parse.inputStream.close();
} catch (Exception e) {
parse.addProblem("couldn't close input stream", e);
}
}
}
}
protected Document buildDom(Parse parse) {
Document document = null;
try {
SAXParser saxParser = saxParserFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
try {
saxParser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
} catch (Exception e){
log.info("couldn't set schema language property", e);
}
if (schemaResources!=null) {
try {
saxParser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaSource", schemaResources);
} catch (Exception e){
log.info("couldn't set schema source property", e);
}
}
try {
xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic", true);
} catch (Exception e){
log.info("couldn't set dynamic validation feature", e);
}
DocumentBuilder documentBuilder = createDocumentBuilder(parse);
document = documentBuilder.getDOMImplementation().createDocument(null, null, null);
parse.setDocument(document);
DomBuilder domBuilder = new DomBuilder();
domBuilder.setDocument(document);
xmlReader.setContentHandler(domBuilder);
xmlReader.setErrorHandler(parse);
InputSource inputSource = parse.getInputSource();
xmlReader.parse(inputSource);
} catch (Exception e) {
parse.addProblem("couldn't parse xml document", e);
}
return document;
}
// Document Object Model walking ////////////////////////////////////////////
/** start of the DOM walk.
*
* This method is used as part of
* {@link #execute(Parse) the parse execution}.
*
* This default implementation behaviour extracts the document element and
* delegates to {@link #parseDocumentElement(Element, Parse)}.
*
* This method can be overridden for customized behaviour.
*
* @return the object that is the result from parsing this document. */
public Object parseDocument(Document document, Parse parse) {
Object object = parseDocumentElement(document.getDocumentElement(), parse);
parse.documentObject = object;
return object;
}
/** parses the top level element in the document and produces the object that
* is the result from the parsing.
*
* @return the object that is the result from parsing this document element. */
public Object parseDocumentElement(Element documentElement, Parse parse) {
return parseElement(documentElement, parse);
}
/** parses an arbitrary element in the document with the first matching
* binding found using any of the categories.
*
* @return the object that is the result from parsing this element. */
public Object parseElement(Element element, Parse parse) {
return parseElement(element, parse, null);
}
/** parses an arbitrary element in the document based on the bindings in the
* given category.
*
* @param category is the category in which the tagName should be resolved to
* a {@link Binding}. If category is null, all the categories will be
* scanned for an appropriate binding in random order.
*
* @return the object that is the result from parsing this element. */
public Object parseElement(Element element, Parse parse, String category) {
Object object = null;
String tagName = XmlUtil.getTagLocalName(element);
Binding binding = getBinding(element, category);
if (binding!=null) {
object = binding.parse(element, parse, this);
} else if (log.isDebugEnabled()) {
log.debug("no element parser for tag "+tagName+(category!=null ? " in category "+category : " in the default category"));
}
return object;
}
public List<ArgDescriptor> parseArgs(List<Element> argElements, Parse parse) {
return parseArgs(argElements, parse, WireParser.CATEGORY_DESCRIPTOR);
}
public List<ArgDescriptor> parseArgs(List<Element> argElements, Parse parse, String category) {
List<ArgDescriptor> args = null;
if (argElements!=null) {
if (argElements.size()>0) {
args = new ArrayList<ArgDescriptor>(argElements.size());
}
for (Element argElement: argElements) {
ArgDescriptor argDescriptor = new ArgDescriptor();
argDescriptor.setTypeName(XmlUtil.attribute(argElement, "type"));
Element descriptorElement = XmlUtil.element(argElement);
if (descriptorElement==null) {
parse.addProblem("arg must contain exactly one descriptor element out of "+bindings.getTagNames(category)+" as contents:"+XmlUtil.toString((Element) argElement.getParentNode()), argElement);
} else {
Descriptor descriptor = (Descriptor) parseElement(descriptorElement, parse, category);
argDescriptor.setDescriptor(descriptor);
}
args.add(argDescriptor);
}
}
return args;
}
}