Source Code of org.jbpm.xml.Parser

/*
 * JBoss, Home of Professional Open Source
 * Copyright 2005, JBoss Inc., and individual contributors as indicated
 * by the @authors tag. See the copyright.txt in the distribution for a
 * full listing of individual contributors.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.jbpm.xml;


import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;


import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;


import org.jbpm.PvmException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;


/**
 * XML parser based on JAXP dom parser with simple binding support, error
 * handling and entity resolving.
 *
 * <h2>Purpose</h2>
 * <p>This is a base parser for the common pattern where first JAXP is used
 * to parse xml into a dom object model, and then, this dom object model is
 * examined to build a domain model object.
 * </p>
 *
 * <p>A {@link Parser} is a thread safe object.  For each parse, a new {@link Parse}
 * object is created that contains all information for that parse operation.  The
 * return value of the parse methods is the domain object that is build from the
 * dom model.
 * </p>
 *
 * <h2>Usage</h2>
 * <p>Simplest usage (e.g. in test cases):
 * </p>
 *
 * <pre>
 * static Parser parser = new Parser();
 *
 * void someMethod() {
 *   MyDomainObject mdo = (MyDomainObject) parser.parseXmlString(myXmlString);
 * }
 * </pre>
 *
 * <p>In case you want to collect problems from a parsing operation, use the API
 * like this:
 * </p>
 *
 * <pre>
 * static Parser parser = new Parser();
 *
 * void someMethod() {
 *   Parse parse = new Parse();
 *   parser.parseXmlString(myXmlString, parse);
 *
 *   if (parse.hasProblems()) {
 *     List<Problem> problems = parse.getProblems();
 *     // throw exception, log parse problems or do something else
 *
 *   } else {
 *     // use the parsed domain object
 *     MyDomainObject mdo = (MyDomainObject) parse.getDomainObject();
 *   }
 * }
 * </pre>
 *
 * <h2 id="binding">Binding</h2>
 * <p>Bindings are the link between a certain type of element in your XML document
 * and the corresponding java object in your domain model.</p>
 *
 * <p>A parser can be configured with a set of {@link Binding}s.  Each {@link Binding}
 * knows how to transform a dom element of a given tagName to the corresponding Java
 * object.  {@link Bindings} has a notion of binding categories.  For example, nodes
 * and actions can be seen as different categories in jPDL.
 * </p>
 *
 * <p>The purpose of bindings is to make certain elements in the parsing configurable.
 * E.g. in jPDL, the main structure of the document is fixed.  But node types can be
 * added dynamically.
 * </p>
 *
 * <p>The current {@link Bindings} implementation only supports matching of an
 * element with a {@link Binding} based on tagName.  If you want to take other things
 * into account (e.g. when you want to differentiate between elements of the same
 * tagName with a different attribute value), you can create a specialized
 * {@link Bindings} class.</p>
 *
 * <p>Bindings are added by tagName, but they have to be looked up by element.  That is
 * to support more specialized bindings implementations that match an element with a
 * binding on more information then just the tagName.  In that case, a specialized subclass of
 * {@link Binding} should be created and the method {@link #getBinding(Element, String)} and
 * constructor {@link Bindings#Bindings(Bindings)} should be provided
 * with the more specialized matching behaviour.
 * </p>
 *
 * <h2 id="objectstack">Object stack</h2>
 * <p>When implementing {@link Binding}s, you might want to make use of the
 * contextual object stack that is provided on the {@link Parse}.  The
 * {@link Binding} implementations can maintain Java objects on that stack
 * that are being created.
 * </p>
 *
 * <p>E.g. you could push the ProcessDefinition element onto the object stack while it
 * is being parsed like this:
 * </p>
 *
 * <pre>public class MyProcessBinding implements Binding {
 *
 *   public Object parse(Element element, Parse parse, Parser parser) {
 *     <i>// instantiate the object for this binding</i>
 *     MyProcess myProcess = new MyProcess();
 *
 *     <i>// collect all the child elements of element</i>
 *     List<Element> elements = XmlUtil.elements(element);
 *
 *     <i>// push my processDefinition onto the object stack</i>
 *     parse.pushObject(myProcess);
 *     try {
 *
 *       for (Element nodeElement: elements) {
 *         // parse the child elements with the bindings in category "node"
 *         parseElement(nodeElement, parse, "node");
 *       }
 *     } finally {
 *       // make sure my processDefinition is popped.
 *       parse.popObject();
 *     }
 *     return myProcess;
 *   }
 * }
 * </pre>
 *
 * <p>Then, node bindings might access the processDefinition like this:
 * </p>
 *
 * <pre>public class MyNodeBinding implements Binding {
 *
 *   public Object parse(Element element, Parse parse, Parser parser) {
 *     <i>// instantiate the object for this binding</i>
 *     MyNode myNode = new MyNode();
 *
 *     <i>// add the node to the processDefinition</i>
 *     MyProcess myProcess = parse.findObject(MyProcess.class);
 *     myProcess.addNode(myNode);
 *     myNode.setMyProcess(myProcess);
 *
 *     return myNode;
 *   }
 * }
 * </pre>
 *
 * <p>A parser implementation will typically have a static Bindings object that
 * is leveraged in all parser objects.   To customize bindings for a such a parser
 * be sure to make a deep copy with {@link Bindings#Bindings(Bindings)} before
 * you start adding more bindings to the specialized parser.  Otherwise the
 * base parser's bindings will be updated as well.
 * </p>
 *
 * <h2>Problems and exceptions</h2>
 *
 * <p>During a parse, no exceptions are thrown. Instead, problems are added
 * to the parse.  That way, the user of the parser can decide what to do with
 * the problems: throw some exception or just log the problems.
 * </p>
 *
 * <p>Only 2 exceptions are thrown by this xml component:
 * <ul>
 *   <li>{@link #addEntity(String, Entity)} throws a PvmException
 *     when the resource could not be resolved to a URL.
 *   </li>
 *   <li>{@link #parseInputSource(InputSource, Parse)} throws a PvmException when
 *     parse is null.
 *   </li>
 * </ul>
 * The third exception thrown is in the {@link UrlEntity} when the resource URL can't
 * be translated into an inputSource.  But that exception should be caught in the
 * {@link #parseInputSource(InputSource, Parse)} method and translated into a problem
 * on the parse.
 * </p>
 *
 * <h2>Reusing {@link Parse}s</h2>
 * <p>In situations where you want to parse multiple documents you can reuse
 * the same parse object.  First, this enables easy collection of all problems in
 * a single {@link Parse} object.  And second, it also allows for pushing contextual
 * objects onto the object stack for all of these parses.
 * </p>
 *
 * <h2 id="entityresolving">Entity resolving</h2>
 * <p>A parser can be configured with a set of entities with the
 * {@link #addEntity(String, Entity)} method.  The {@link UrlEntity} has
 * a convenience method to build entities from resources
 * {@link UrlEntity#UrlEntity(String, ClassLoader)}.
 * </p>
 *
 * <p>When a document builder is created, the default implementation of the
 * {@link #setEntityResolver(DocumentBuilder)} will set this parser as the entity resolver.
 * The implementation method of {@link EntityResolver} ({@link #resolveEntity(String, String)}
 * will use the added {@link Entity}s to try and find a match based on the
 * publicId.  If one is found, the {@link Entity} inputSource is returned, otherwise
 * the systemId is used.
 * </p>
 *
 * <p>This class is intended to be used with aggregation as well as inheritence.
 * </p>
 *
 * @author Tom Baeyens
 */
public class Parser implements EntityResolver {


  private static Logger log = Logger.getLogger(Parser.class.getName());


  protected DocumentBuilderFactory documentBuilderFactory = null;
  protected Map<String, Entity> entities = null;
  protected Bindings bindings = null;
  protected ClassLoader classLoader = null;


  public Parser() {
  }


  /** creates a new Parser with bindings that can be maintained statically in
   * specialized subclasses of Parser.
   */
  public Parser(Bindings bindings) {
    this.bindings = bindings;
  }


  /** creates a new Parser with bindings and entities that can be maintained statically
   * in specialized subclasses of Parser.
   */
  public Parser(Bindings bindings, Map<String, Entity> entities) {
    this.bindings = bindings;
    this.entities = entities;
  }


  // document builder methods /////////////////////////////////////////////////


  /** getter with lazy initialization of the document builder factory.
   * If no document builder factory was set previously with the {@link #setDocumentBuilderFactory(DocumentBuilderFactory)}
   * method, {@link #newDocumentBuilderFactory()} will be called to create one.
   */
  public synchronized DocumentBuilderFactory getDocumentBuilderFactory() {
    if (documentBuilderFactory==null) {
      documentBuilderFactory = newDocumentBuilderFactory();
    }
    return documentBuilderFactory;
  }


  /** setter for the document builder factory */
  public void setDocumentBuilderFactory(DocumentBuilderFactory documentBuilderFactory) {
    this.documentBuilderFactory = documentBuilderFactory;
  }


  /** factory method for {@link DocumentBuilderFactory} during lazy initialization
   * of the documentBuilderFactory.  Can be overridden by subclasses to change
   * the DocumentBuilderFactory implementation or to apply specific configurations. */
  protected DocumentBuilderFactory newDocumentBuilderFactory() {
    return DocumentBuilderFactory.newInstance();
  }


  // entities /////////////////////////////////////////////////////////////////


  /** adds a resolver to the schema catalog.
   * See also <a href="#entityresolving">section 'Entity resolving'</a>.
   */
  public void addEntity(String publicId, Entity entity) {
    if (entities==null) {
      entities = new HashMap<String, Entity>();
    }
    entities.put(publicId, entity);
  }


  /** makes sure that an {@link EntityResolver} is created based on the {@link Entity}s in this parser. 
   * even when none of the {@link #addEntity(String, Entity)} methods are called.
   * This enables addition of entities on a per-{@link Parse} basis when there are no parser-level entities. */
  public void useParseEntityResolver() {
    if (entities==null) {
      entities = new HashMap<String, Entity>();
    }
  }


  // bindings /////////////////////////////////////////////////////////////////


  public Bindings getBindings() {
    return bindings;
  }


  public void setBindings(Bindings bindings) {
    this.bindings = bindings;
  }


  public Binding getBinding(Element element) {
    return getBinding(element, null);
  }


  public Binding getBinding(Element element, String category) {
    return (bindings!=null ? bindings.getBinding(element, category) : null);
  }


  // xml to dom parsing methods ///////////////////////////////////////////////


  /** parses the domain object from an inputSource. */
  public Object parseInputSource(InputSource inputSource) {
    return parseInputSource(inputSource, new Parse());
  }


  /** parses the domain object from an inputSource and collects the problems in the given {@link Parse}.
   * @throws PvmException when parse is null.
   */
  public Object parseInputSource(InputSource inputSource, Parse parse) {
    if (parse==null) {
      throw new PvmException("parse is null");
    }
    if (inputSource==null) {
      parse.addProblem("no inputSource");
      return null;
    }
    // create the document builder
    DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
    DocumentBuilder documentBuilder = null;
    try {
      documentBuilder = documentBuilderFactory.newDocumentBuilder();
    } catch (Exception e) {
      parse.addProblem("couldn't get new document builder", e);
      return null;
    }
    setErrorHandler(documentBuilder, parse);
    setEntityResolver(documentBuilder);
    Document document = null;
    try {
      document = documentBuilder.parse(inputSource);
    } catch (Exception e) {
      parse.addProblem("couldn't parse xml document", e);
      return null;
    }
    parse.setDocument(document);
    Object documentObject = parseDocument(document, parse);
    if (documentObject!=null) {
      parse.setDocumentObject(documentObject);
    }
    return documentObject;
  }


  /** sets this parser as the entity resolver on the documentBuilder which
   * uses the {@link #entities} to resolver entities.
   * Override to customize the entity resolver.
   */
  protected void setEntityResolver(DocumentBuilder documentBuilder) {
    documentBuilder.setEntityResolver(this);
  }


  /** sets this parse as the error handler on the documentBuilder which
   * collect all errors as problems in the parse..
   * Override to customize the error handler.
   */
  protected void setErrorHandler(DocumentBuilder documentBuilder, Parse parse) {
    documentBuilder.setErrorHandler(parse);
  }


  /** parses the domain object from an xml string. */
  public Object parseXmlString(String xmlString) {
    return parseXmlString(xmlString, new Parse());
  }


  /** parses the domain object from an xml string and collects the problems in the given {@link Parse}. */
  public Object parseXmlString(String xmlString, Parse parse) {
    if (xmlString==null) {
      parse.addProblem("couldn't parse null xmlString");
      return null;
    }
    byte[] bytes = xmlString.getBytes();
    ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
    InputSource inputSource = new InputSource(bais);
    return parseInputSource(inputSource, parse);
  }


  /** parses the domain object from a resource.
   * See {@link #parseResource(String, ClassLoader, Parse)} for more information on how the
   * resource is loaded.  The default value for the classLoader is null.
   */
  public Object parseResource(String resource) {
    return parseResource(resource, null);
  }


  /** parses the domain object from an xml resource and a classLoader.
   * See {@link #parseResource(String, ClassLoader, Parse)} for more information on how the
   * resource is loaded.
   */
  public Object parseResource(String resource, ClassLoader classLoader) {
    return parseResource(resource, classLoader, new Parse());
  }


  /** parses the domain object from an xml resource and a classLoader.
   * The resource is loaded from the classLoader as a stream.  This means
   * that resources should be visible to the classloader and they should be
   * referenced relative to the root of the classpath.  E.g.
   * <code>org/jbpm/MyResourceFile.xml</code>
   * If the classLoader is null, the classloader of this class ({@link Parser})
   * is used.
   */
  public Object parseResource(String resource, ClassLoader classLoader, Parse parse) {
    if (resource==null) {
      parse.addProblem("couldn't parse resource null");
      return null;
    }
    this.classLoader = classLoader;
    InputStream inputStream = null;
    inputStream = getClassLoader().getResourceAsStream(resource);
    if (inputStream==null) {
      parse.addProblem("resource "+resource+" doesn't exist");
      return null;
    }
    InputSource inputSource = new InputSource(inputStream);
    inputSource.setPublicId(resource);
    return parseInputSource(inputSource, parse);
  }


  /** returns the given classLoader ({@link #parseResource(String, ClassLoader)} or
   * the current context class loader */
  public ClassLoader getClassLoader() {
    if (classLoader!=null) {
      return classLoader;
    }
    return Thread.currentThread().getContextClassLoader();
  }


  /** parses the domain object from an xml file. */
  public Object parseFile(File file) {
    return parseFile(file, new Parse());
  }


  /** parses the domain object from an xml file and collects the problems in the given {@link Parse}. */
  public Object parseFile(File file, Parse parse) {
    if (file==null) {
      parse.addProblem("couldn't parse file null");
      return null;
    }
    FileReader fileReader = null;
    try {
      fileReader = new FileReader(file);
    } catch (FileNotFoundException e) {
      parse.addProblem(file.getAbsolutePath()+" is not a file", e);
      return null;
    }
    InputSource inputSource = new InputSource(fileReader);
    inputSource.setPublicId(file.getPath());
    inputSource.setSystemId(file.getAbsolutePath());
    return parseInputSource(inputSource, parse);
  }


  /** parses the domain object from an xml input stream. */
  public Object parseInputStream(InputStream inputStream) {
    return parseInputStream(inputStream, new Parse());
  }


  /** parses the domain object from an xml input stream and collects the problems in the given {@link Parse}. */
  public Object parseInputStream(InputStream inputStream, Parse parse) {
    if (inputStream==null) {
      parse.addProblem("couldn't parse inputStream null");
    }
    return parseInputSource(new InputSource(inputStream), parse);
  }


  // dom to object model parsing methods //////////////////////////////////////


  /** this method builds the object model from the dom tree.  This default implementation just
   * delegates to {@link #parseDocumentElement(Element, Parse)} to parse the root element of
   * the document.
   *
   * This method can be overridden for customized behaviour.
   * @return the resulting domain model object.
   */
  public Object parseDocument(Document document, Parse parse) {
    return parseDocumentElement(document.getDocumentElement(), parse);
  }


  /** this method builds the object model from the dom tree.  This default implementation
   * will delegate to the {@link #parseElement(Element, Parse)} and then put the resulting
   * object as the {@link Parse#getDocumentObject()}.
   *
   * <p>Override this method if you want custom behaviour for parsing the root element
   * in the document without installing a {@link Binding} requires a fixed tagName.
   * </p>
   *
   * @return the resulting domain model object.
   */
  public Object parseDocumentElement(Element documentElement, Parse parse) {
    return parseElement(documentElement, parse);
  }


  /** this method parses an arbitrary element in the document with the first matching binding found
   * using any of the categories.
   */
  public Object parseElement(Element element, Parse parse) {
    return parseElement(element, parse, null);
  }


  /** this method parses an arbitrary element in the document based on the bindings in the given category.
   * @param category is the category in which the tagName should be resolved to an ElementHandler.  If
   *   category is null, all the categories will be scanned for an appropriate binding in random order. */
  public Object parseElement(Element element, Parse parse, String category) {


    Object object = null;
    String tagName = XmlUtil.getTagName(element);


    Binding binding = getBinding(element, category);


    if (binding!=null) {
      object = binding.parse(element, parse, this);
    } else {
      parse.addProblem("no element parser for tag "+tagName+(category!=null ? " in category "+category : " in the default category"));
    }


    return object;
  }


  /** implementation of {@link EntityResolver} based on a map of {@link Entity}s.
   * See also <a href="entityresolving">section 'Entity resolving'</a>.
   * @see #addEntity(String, Entity)
   */
  public InputSource resolveEntity(String publicId, String systemId) {
    InputSource inputSource = null;
    if (entities!=null) {
      Entity entity = entities.get(publicId);
      if (entity!=null) {
        inputSource = entity.getInputSource();
      }
    }
    if (inputSource==null) {
      if (systemId!=null) {
        // plan b: see if we can build an inputsource from the systemId
        inputSource = new InputSource(systemId);
      } else {
        log.severe("couldn't resolve entity with publicId "+publicId+" and systemId "+systemId);
      }
    }
    return inputSource;
  }
  
  /**
   * Parse an xml stream from a resource and copy the children of the document element to the dest element.  
   * @param inputStream the stream to parse
   * @param dest the element where the result will be append
   * @param parse
   */
  public void importStream(InputStream inputStream, Element dest, Parse parse) {
    // create the document builder
    DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
    DocumentBuilder documentBuilder = null;
    try {
      documentBuilder = documentBuilderFactory.newDocumentBuilder();
    } catch (Exception e) {
      parse.addProblem("couldn't get new document builder", e);
      return;
    }
    Document document = null;
    try {
      document = documentBuilder.parse(inputStream);
    } catch (Exception e) {
      parse.addProblem("couldn't parse xml stream", e);
      return;
    }
    Element n = document.getDocumentElement();
    try {
      for(Element e : XmlUtil.elements(n)) {
        dest.appendChild(dest.getOwnerDocument().importNode(e, true));
      }
    } catch (Exception e) {
      parse.addProblem("couldn't import node from xml stream ", e);
    }
  }


}
Source Code of org.jbpm.xml.Parser

Related Classes of org.jbpm.xml.Parser