/*
* JBoss, Home of Professional Open Source
* Copyright 2005, JBoss Inc., and individual contributors as indicated
* by the @authors tag. See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.jbpm.xml;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.jbpm.PvmException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
/**
* XML parser based on JAXP dom parser with simple binding support, error
* handling and entity resolving.
*
* <h2>Purpose</h2>
* <p>This is a base parser for the common pattern where first JAXP is used
* to parse xml into a dom object model, and then, this dom object model is
* examined to build a domain model object.
* </p>
*
* <p>A {@link Parser} is a thread safe object. For each parse, a new {@link Parse}
* object is created that contains all information for that parse operation. The
* return value of the parse methods is the domain object that is build from the
* dom model.
* </p>
*
* <h2>Usage</h2>
* <p>Simplest usage (e.g. in test cases):
* </p>
*
* <pre>
* static Parser parser = new Parser();
*
* void someMethod() {
* MyDomainObject mdo = (MyDomainObject) parser.parseXmlString(myXmlString);
* }
* </pre>
*
* <p>In case you want to collect problems from a parsing operation, use the API
* like this:
* </p>
*
* <pre>
* static Parser parser = new Parser();
*
* void someMethod() {
* Parse parse = new Parse();
* parser.parseXmlString(myXmlString, parse);
*
* if (parse.hasProblems()) {
* List<Problem> problems = parse.getProblems();
* // throw exception, log parse problems or do something else
*
* } else {
* // use the parsed domain object
* MyDomainObject mdo = (MyDomainObject) parse.getDomainObject();
* }
* }
* </pre>
*
* <h2 id="binding">Binding</h2>
* <p>Bindings are the link between a certain type of element in your XML document
* and the corresponding java object in your domain model.</p>
*
* <p>A parser can be configured with a set of {@link Binding}s. Each {@link Binding}
* knows how to transform a dom element of a given tagName to the corresponding Java
* object. {@link Bindings} has a notion of binding categories. For example, nodes
* and actions can be seen as different categories in jPDL.
* </p>
*
* <p>The purpose of bindings is to make certain elements in the parsing configurable.
* E.g. in jPDL, the main structure of the document is fixed. But node types can be
* added dynamically.
* </p>
*
* <p>The current {@link Bindings} implementation only supports matching of an
* element with a {@link Binding} based on tagName. If you want to take other things
* into account (e.g. when you want to differentiate between elements of the same
* tagName with a different attribute value), you can create a specialized
* {@link Bindings} class.</p>
*
* <p>Bindings are added by tagName, but they have to be looked up by element. That is
* to support more specialized bindings implementations that match an element with a
* binding on more information then just the tagName. In that case, a specialized subclass of
* {@link Binding} should be created and the method {@link #getBinding(Element, String)} and
* constructor {@link Bindings#Bindings(Bindings)} should be provided
* with the more specialized matching behaviour.
* </p>
*
* <h2 id="objectstack">Object stack</h2>
* <p>When implementing {@link Binding}s, you might want to make use of the
* contextual object stack that is provided on the {@link Parse}. The
* {@link Binding} implementations can maintain Java objects on that stack
* that are being created.
* </p>
*
* <p>E.g. you could push the ProcessDefinition element onto the object stack while it
* is being parsed like this:
* </p>
*
* <pre>public class MyProcessBinding implements Binding {
*
* public Object parse(Element element, Parse parse, Parser parser) {
* <i>// instantiate the object for this binding</i>
* MyProcess myProcess = new MyProcess();
*
* <i>// collect all the child elements of element</i>
* List<Element> elements = XmlUtil.elements(element);
*
* <i>// push my processDefinition onto the object stack</i>
* parse.pushObject(myProcess);
* try {
*
* for (Element nodeElement: elements) {
* // parse the child elements with the bindings in category "node"
* parseElement(nodeElement, parse, "node");
* }
* } finally {
* // make sure my processDefinition is popped.
* parse.popObject();
* }
* return myProcess;
* }
* }
* </pre>
*
* <p>Then, node bindings might access the processDefinition like this:
* </p>
*
* <pre>public class MyNodeBinding implements Binding {
*
* public Object parse(Element element, Parse parse, Parser parser) {
* <i>// instantiate the object for this binding</i>
* MyNode myNode = new MyNode();
*
* <i>// add the node to the processDefinition</i>
* MyProcess myProcess = parse.findObject(MyProcess.class);
* myProcess.addNode(myNode);
* myNode.setMyProcess(myProcess);
*
* return myNode;
* }
* }
* </pre>
*
* <p>A parser implementation will typically have a static Bindings object that
* is leveraged in all parser objects. To customize bindings for a such a parser
* be sure to make a deep copy with {@link Bindings#Bindings(Bindings)} before
* you start adding more bindings to the specialized parser. Otherwise the
* base parser's bindings will be updated as well.
* </p>
*
* <h2>Problems and exceptions</h2>
*
* <p>During a parse, no exceptions are thrown. Instead, problems are added
* to the parse. That way, the user of the parser can decide what to do with
* the problems: throw some exception or just log the problems.
* </p>
*
* <p>Only 2 exceptions are thrown by this xml component:
* <ul>
* <li>{@link #addEntity(String, Entity)} throws a PvmException
* when the resource could not be resolved to a URL.
* </li>
* <li>{@link #parseInputSource(InputSource, Parse)} throws a PvmException when
* parse is null.
* </li>
* </ul>
* The third exception thrown is in the {@link UrlEntity} when the resource URL can't
* be translated into an inputSource. But that exception should be caught in the
* {@link #parseInputSource(InputSource, Parse)} method and translated into a problem
* on the parse.
* </p>
*
* <h2>Reusing {@link Parse}s</h2>
* <p>In situations where you want to parse multiple documents you can reuse
* the same parse object. First, this enables easy collection of all problems in
* a single {@link Parse} object. And second, it also allows for pushing contextual
* objects onto the object stack for all of these parses.
* </p>
*
* <h2 id="entityresolving">Entity resolving</h2>
* <p>A parser can be configured with a set of entities with the
* {@link #addEntity(String, Entity)} method. The {@link UrlEntity} has
* a convenience method to build entities from resources
* {@link UrlEntity#UrlEntity(String, ClassLoader)}.
* </p>
*
* <p>When a document builder is created, the default implementation of the
* {@link #setEntityResolver(DocumentBuilder)} will set this parser as the entity resolver.
* The implementation method of {@link EntityResolver} ({@link #resolveEntity(String, String)}
* will use the added {@link Entity}s to try and find a match based on the
* publicId. If one is found, the {@link Entity} inputSource is returned, otherwise
* the systemId is used.
* </p>
*
* <p>This class is intended to be used with aggregation as well as inheritence.
* </p>
*
* @author Tom Baeyens
*/
public class Parser implements EntityResolver {
private static Logger log = Logger.getLogger(Parser.class.getName());
protected DocumentBuilderFactory documentBuilderFactory = null;
protected Map<String, Entity> entities = null;
protected Bindings bindings = null;
protected ClassLoader classLoader = null;
public Parser() {
}
/** creates a new Parser with bindings that can be maintained statically in
* specialized subclasses of Parser.
*/
public Parser(Bindings bindings) {
this.bindings = bindings;
}
/** creates a new Parser with bindings and entities that can be maintained statically
* in specialized subclasses of Parser.
*/
public Parser(Bindings bindings, Map<String, Entity> entities) {
this.bindings = bindings;
this.entities = entities;
}
// document builder methods /////////////////////////////////////////////////
/** getter with lazy initialization of the document builder factory.
* If no document builder factory was set previously with the {@link #setDocumentBuilderFactory(DocumentBuilderFactory)}
* method, {@link #newDocumentBuilderFactory()} will be called to create one.
*/
public synchronized DocumentBuilderFactory getDocumentBuilderFactory() {
if (documentBuilderFactory==null) {
documentBuilderFactory = newDocumentBuilderFactory();
}
return documentBuilderFactory;
}
/** setter for the document builder factory */
public void setDocumentBuilderFactory(DocumentBuilderFactory documentBuilderFactory) {
this.documentBuilderFactory = documentBuilderFactory;
}
/** factory method for {@link DocumentBuilderFactory} during lazy initialization
* of the documentBuilderFactory. Can be overridden by subclasses to change
* the DocumentBuilderFactory implementation or to apply specific configurations. */
protected DocumentBuilderFactory newDocumentBuilderFactory() {
return DocumentBuilderFactory.newInstance();
}
// entities /////////////////////////////////////////////////////////////////
/** adds a resolver to the schema catalog.
* See also <a href="#entityresolving">section 'Entity resolving'</a>.
*/
public void addEntity(String publicId, Entity entity) {
if (entities==null) {
entities = new HashMap<String, Entity>();
}
entities.put(publicId, entity);
}
/** makes sure that an {@link EntityResolver} is created based on the {@link Entity}s in this parser.
* even when none of the {@link #addEntity(String, Entity)} methods are called.
* This enables addition of entities on a per-{@link Parse} basis when there are no parser-level entities. */
public void useParseEntityResolver() {
if (entities==null) {
entities = new HashMap<String, Entity>();
}
}
// bindings /////////////////////////////////////////////////////////////////
public Bindings getBindings() {
return bindings;
}
public void setBindings(Bindings bindings) {
this.bindings = bindings;
}
public Binding getBinding(Element element) {
return getBinding(element, null);
}
public Binding getBinding(Element element, String category) {
return (bindings!=null ? bindings.getBinding(element, category) : null);
}
// xml to dom parsing methods ///////////////////////////////////////////////
/** parses the domain object from an inputSource. */
public Object parseInputSource(InputSource inputSource) {
return parseInputSource(inputSource, new Parse());
}
/** parses the domain object from an inputSource and collects the problems in the given {@link Parse}.
* @throws PvmException when parse is null.
*/
public Object parseInputSource(InputSource inputSource, Parse parse) {
if (parse==null) {
throw new PvmException("parse is null");
}
if (inputSource==null) {
parse.addProblem("no inputSource");
return null;
}
// create the document builder
DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
DocumentBuilder documentBuilder = null;
try {
documentBuilder = documentBuilderFactory.newDocumentBuilder();
} catch (Exception e) {
parse.addProblem("couldn't get new document builder", e);
return null;
}
setErrorHandler(documentBuilder, parse);
setEntityResolver(documentBuilder);
Document document = null;
try {
document = documentBuilder.parse(inputSource);
} catch (Exception e) {
parse.addProblem("couldn't parse xml document", e);
return null;
}
parse.setDocument(document);
Object documentObject = parseDocument(document, parse);
if (documentObject!=null) {
parse.setDocumentObject(documentObject);
}
return documentObject;
}
/** sets this parser as the entity resolver on the documentBuilder which
* uses the {@link #entities} to resolver entities.
* Override to customize the entity resolver.
*/
protected void setEntityResolver(DocumentBuilder documentBuilder) {
documentBuilder.setEntityResolver(this);
}
/** sets this parse as the error handler on the documentBuilder which
* collect all errors as problems in the parse..
* Override to customize the error handler.
*/
protected void setErrorHandler(DocumentBuilder documentBuilder, Parse parse) {
documentBuilder.setErrorHandler(parse);
}
/** parses the domain object from an xml string. */
public Object parseXmlString(String xmlString) {
return parseXmlString(xmlString, new Parse());
}
/** parses the domain object from an xml string and collects the problems in the given {@link Parse}. */
public Object parseXmlString(String xmlString, Parse parse) {
if (xmlString==null) {
parse.addProblem("couldn't parse null xmlString");
return null;
}
byte[] bytes = xmlString.getBytes();
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
InputSource inputSource = new InputSource(bais);
return parseInputSource(inputSource, parse);
}
/** parses the domain object from a resource.
* See {@link #parseResource(String, ClassLoader, Parse)} for more information on how the
* resource is loaded. The default value for the classLoader is null.
*/
public Object parseResource(String resource) {
return parseResource(resource, null);
}
/** parses the domain object from an xml resource and a classLoader.
* See {@link #parseResource(String, ClassLoader, Parse)} for more information on how the
* resource is loaded.
*/
public Object parseResource(String resource, ClassLoader classLoader) {
return parseResource(resource, classLoader, new Parse());
}
/** parses the domain object from an xml resource and a classLoader.
* The resource is loaded from the classLoader as a stream. This means
* that resources should be visible to the classloader and they should be
* referenced relative to the root of the classpath. E.g.
* <code>org/jbpm/MyResourceFile.xml</code>
* If the classLoader is null, the classloader of this class ({@link Parser})
* is used.
*/
public Object parseResource(String resource, ClassLoader classLoader, Parse parse) {
if (resource==null) {
parse.addProblem("couldn't parse resource null");
return null;
}
this.classLoader = classLoader;
InputStream inputStream = null;
inputStream = getClassLoader().getResourceAsStream(resource);
if (inputStream==null) {
parse.addProblem("resource "+resource+" doesn't exist");
return null;
}
InputSource inputSource = new InputSource(inputStream);
inputSource.setPublicId(resource);
return parseInputSource(inputSource, parse);
}
/** returns the given classLoader ({@link #parseResource(String, ClassLoader)} or
* the current context class loader */
public ClassLoader getClassLoader() {
if (classLoader!=null) {
return classLoader;
}
return Thread.currentThread().getContextClassLoader();
}
/** parses the domain object from an xml file. */
public Object parseFile(File file) {
return parseFile(file, new Parse());
}
/** parses the domain object from an xml file and collects the problems in the given {@link Parse}. */
public Object parseFile(File file, Parse parse) {
if (file==null) {
parse.addProblem("couldn't parse file null");
return null;
}
FileReader fileReader = null;
try {
fileReader = new FileReader(file);
} catch (FileNotFoundException e) {
parse.addProblem(file.getAbsolutePath()+" is not a file", e);
return null;
}
InputSource inputSource = new InputSource(fileReader);
inputSource.setPublicId(file.getPath());
inputSource.setSystemId(file.getAbsolutePath());
return parseInputSource(inputSource, parse);
}
/** parses the domain object from an xml input stream. */
public Object parseInputStream(InputStream inputStream) {
return parseInputStream(inputStream, new Parse());
}
/** parses the domain object from an xml input stream and collects the problems in the given {@link Parse}. */
public Object parseInputStream(InputStream inputStream, Parse parse) {
if (inputStream==null) {
parse.addProblem("couldn't parse inputStream null");
}
return parseInputSource(new InputSource(inputStream), parse);
}
// dom to object model parsing methods //////////////////////////////////////
/** this method builds the object model from the dom tree. This default implementation just
* delegates to {@link #parseDocumentElement(Element, Parse)} to parse the root element of
* the document.
*
* This method can be overridden for customized behaviour.
* @return the resulting domain model object.
*/
public Object parseDocument(Document document, Parse parse) {
return parseDocumentElement(document.getDocumentElement(), parse);
}
/** this method builds the object model from the dom tree. This default implementation
* will delegate to the {@link #parseElement(Element, Parse)} and then put the resulting
* object as the {@link Parse#getDocumentObject()}.
*
* <p>Override this method if you want custom behaviour for parsing the root element
* in the document without installing a {@link Binding} requires a fixed tagName.
* </p>
*
* @return the resulting domain model object.
*/
public Object parseDocumentElement(Element documentElement, Parse parse) {
return parseElement(documentElement, parse);
}
/** this method parses an arbitrary element in the document with the first matching binding found
* using any of the categories.
*/
public Object parseElement(Element element, Parse parse) {
return parseElement(element, parse, null);
}
/** this method parses an arbitrary element in the document based on the bindings in the given category.
* @param category is the category in which the tagName should be resolved to an ElementHandler. If
* category is null, all the categories will be scanned for an appropriate binding in random order. */
public Object parseElement(Element element, Parse parse, String category) {
Object object = null;
String tagName = XmlUtil.getTagName(element);
Binding binding = getBinding(element, category);
if (binding!=null) {
object = binding.parse(element, parse, this);
} else {
parse.addProblem("no element parser for tag "+tagName+(category!=null ? " in category "+category : " in the default category"));
}
return object;
}
/** implementation of {@link EntityResolver} based on a map of {@link Entity}s.
* See also <a href="entityresolving">section 'Entity resolving'</a>.
* @see #addEntity(String, Entity)
*/
public InputSource resolveEntity(String publicId, String systemId) {
InputSource inputSource = null;
if (entities!=null) {
Entity entity = entities.get(publicId);
if (entity!=null) {
inputSource = entity.getInputSource();
}
}
if (inputSource==null) {
if (systemId!=null) {
// plan b: see if we can build an inputsource from the systemId
inputSource = new InputSource(systemId);
} else {
log.severe("couldn't resolve entity with publicId "+publicId+" and systemId "+systemId);
}
}
return inputSource;
}
/**
* Parse an xml stream from a resource and copy the children of the document element to the dest element.
* @param inputStream the stream to parse
* @param dest the element where the result will be append
* @param parse
*/
public void importStream(InputStream inputStream, Element dest, Parse parse) {
// create the document builder
DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
DocumentBuilder documentBuilder = null;
try {
documentBuilder = documentBuilderFactory.newDocumentBuilder();
} catch (Exception e) {
parse.addProblem("couldn't get new document builder", e);
return;
}
Document document = null;
try {
document = documentBuilder.parse(inputStream);
} catch (Exception e) {
parse.addProblem("couldn't parse xml stream", e);
return;
}
Element n = document.getDocumentElement();
try {
for(Element e : XmlUtil.elements(n)) {
dest.appendChild(dest.getOwnerDocument().importNode(e, true));
}
} catch (Exception e) {
parse.addProblem("couldn't import node from xml stream ", e);
}
}
}