Package com.googlecode.html.parsers

Source Code of com.googlecode.html.parsers.DOMParser

/*
* Copyright 2002-2009 Andy Clark, Marc Guillemot
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/

package com.googlecode.html.parsers;

import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.XNIException;

import com.googlecode.html.HTMLConfiguration;
import com.googlecode.html.xercesbridge.XercesBridge;

/**
* A DOM parser for HTML documents.
*
* @author Andy Clark
*
* @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
*/
public class DOMParser
/***/
extends org.apache.xerces.parsers.DOMParser {
   /***
    * // NOTE: It would be better to extend from AbstractDOMParser but // most users will find it
    * easier if the API is just like the // Xerces DOM parser. By extending directly from DOMParser,
    * // users can register SAX error handlers, entity resolvers, // and the like. -Ac extends
    * org.apache.xerces.parsers.AbstractDOMParser { /
    ***/

   //
   // Constructors
   //

   /** Returns the parser's sub-version number. */
   private static int getParserSubVersion() {
      try {
         String VERSION = XercesBridge.getInstance().getVersion();
         int index1 = VERSION.indexOf('.') + 1;
         int index2 = VERSION.indexOf('.', index1);
         if (index2 == -1) {
            index2 = VERSION.length();
         }
         return Integer.parseInt(VERSION.substring(index1, index2));
      } catch (Exception e) {
         return -1;
      }
   } // getParserSubVersion():int

   //
   // XMLDocumentHandler methods
   //

   /** Default constructor. */
   public DOMParser() {
      super(new HTMLConfiguration());
      /*** extending DOMParser ***/
      try {
         setProperty("http://apache.org/xml/properties/dom/document-class-name",
                  "org.apache.html.dom.HTMLDocumentImpl");
      } catch (org.xml.sax.SAXNotRecognizedException e) {
         throw new RuntimeException(
                  "http://apache.org/xml/properties/dom/document-class-name property not recognized");
      } catch (org.xml.sax.SAXNotSupportedException e) {
         throw new RuntimeException(
                  "http://apache.org/xml/properties/dom/document-class-name property not supported");
      }
      /***
       * extending AbstractDOMParser *** fConfiguration.setProperty(
       * "http://apache.org/xml/properties/dom/document-class-name",
       * "org.apache.html.dom.HTMLDocumentImpl"); /
       ***/
   } // <init>()

   //
   // Private static methods
   //

   /** Doctype declaration. */
   public void doctypeDecl(String root, String pubid, String sysid, Augmentations augs)
            throws XNIException {

      // NOTE: Xerces HTML DOM implementation (up to and including
      // 2.5.0) throws a heirarchy request error exception
      // when a doctype node is appended to the tree. So,
      // don't insert this node into the tree for those
      // versions... -Ac

      String VERSION = XercesBridge.getInstance().getVersion();
      boolean okay = true;
      if (VERSION.startsWith("Xerces-J 2.")) {
         okay = getParserSubVersion() > 5;
      }
      // REVISIT: As soon as XML4J is updated with the latest code
      // from Xerces, then this needs to be updated to
      // check XML4J's version. -Ac
      else if (VERSION.startsWith("XML4J")) {
         okay = false;
      }

      // if okay, insert doctype; otherwise, don't risk it
      if (okay) {
         super.doctypeDecl(root, pubid, sysid, augs);
      }

   } // doctypeDecl(String,String,String,Augmentations)

} // class DOMParser
TOP

Related Classes of com.googlecode.html.parsers.DOMParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.