Package com.sun.xml.xhtml

Source Code of com.sun.xml.xhtml.HtmlSaxParser$DeferredException

/*
* $Id: HtmlSaxParser.java,v 1.3 1999/04/04 19:34:43 db Exp $
*
* Copyright (c) 1999 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the confidential and proprietary information of Sun
* Microsystems, Inc. ("Confidential Information").  You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Sun.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
* SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
* SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
* THIS SOFTWARE OR ITS DERIVATIVES.
*/

package com.sun.xml.xhtml;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;

import java.util.Enumeration;
import java.util.Locale;

import javax.swing.text.BadLocationException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.parser.*;

import org.xml.sax.*;
import org.xml.sax.helpers.AttributeListImpl;

import com.sun.xml.parser.LexicalEventListener;
import com.sun.xml.parser.Resolver;

// debug only:
// import com.sun.xml.tree.XmlDocumentBuilder;


/**
* This is a SAX parser which processes HTML, producing an event stream
* corresponding roughly to what would be produced when parsing a
* well formed (but not in general valid) XHTML document.  It may be
* useful when beginning to convert HTML content into (valid) XHTML
* content, or when using DOM APIs to manipulate such HTML content.
*
* <P> The parser uses the HTML parser built into the SWING library, and
* converts the events it reports into SAX event callbacks.  At this writing,
* that parser understands HTML 3.2, a number of HTML 4.0 constructs at
* least in part, and recovers from many common HTML errors in a manner
* which is compatible with the handling of popular web browsers.
*
* <P> Note that the case of element and attribute names exposed by
* this parser is always <em>normalized to lower case</em>.  This policy
* is the one adopted by current W3C working drafts of <em>XHTML</em>.
* It differs from that adopted by the Level 1 DOM HTML support, which
* normalizes element names to uppercase and uppercases only the initial
* letter of attribute names.
*
* <P> Also, note that when writing out HTML, you may want to avoid
* character encodings such as UTF-8, since HTML processors are less
* consistent in their support of internationalized text than are XML
* processors.  ISO-8859-1 is safe, and newer HTML browsers tend to
* handle other character encodings better than older ones.
*
* @see com.sun.xml.parser.LexicalEventListener
* @see com.sun.xml.tree.XmlDocumentBuilder
* @see javax.swing.text.html.parser.ParserDelegator
*
* @author David Brownell
* @version $Revision: 1.3 $
*/
public class HtmlSaxParser
    extends  ParserCallback
    implements  org.xml.sax.Parser
{
    private ParserDelegator    parser;
    private AttributeListImpl    attributes;
    private boolean      disabled;

    private DocumentHandler    docHandler;
    private DTDHandler      dtdHandler;
    private ErrorHandler    errHandler;
    private EntityResolver    resolver;
    private Locale      locale;
    private LexicalEventListener  lexicalHandler;

   
    /**
     * Constructs a SAX parser accessing the HTML parser built in to
     * the Swing subsystem.
     */
    public HtmlSaxParser ()
    {
  HandlerBase base = new HandlerBase ();

  docHandler = base;
  dtdHandler = base;
  errHandler = base;
  resolver = base;

  parser = new ParserDelegator ();
  attributes = new AttributeListImpl ();
    }


    // SAX parser methods

    /**
     * Parses the HTML document provided through the specified
     * input source.
     */
    public void parse (InputSource in) throws SAXException, IOException
    {
  Reader  reader = in.getCharacterStream ();

  if (reader == null) {
      InputStream  input = in.getByteStream ();

      //
      // XXX must check the character set handling here,
      // along both paths !!!!
      //
      if (input != null) {
    String encoding = in.getEncoding ();
    if (encoding == null)
        encoding = "8859_1";
    reader = new InputStreamReader (input, encoding);

      } else if (in.getSystemId () != null) {
    in = Resolver.createInputSource (
        new java.net.URL (in.getSystemId ()), false);
    reader = in.getCharacterStream ();

      } else
    throw new SAXException (
        "Bad InputSource: no Reader, InputStream, or URI");
  }

  docHandler.startDocument ();
  try {
      parser.parse (reader, this, true);
  } catch (DeferredException x) {
      if (x.ioe != null)
    throw x.ioe;
      if (x.x instanceof SAXParseException)
    errHandler.fatalError ((SAXParseException) x.x);
      throw x.x;
  }
  docHandler.endDocument ();
    }


    /**
     * Parses the HTML document provided at the specified URI.
     */
    public void parse (String uri) throws SAXException, IOException
    {
  parse (new InputSource (uri));
    }

    /**
     * Assigns the document handler through which all HTML content will
     * be reported.  This is the primary application interface to this
     * parser.
     *
     * <P> If this handler implements the lexical event listener interface,
     * it will also be notified of comments.
     */
    public void setDocumentHandler (DocumentHandler handler)
    {
  docHandler = handler;
  if (handler instanceof LexicalEventListener)
      lexicalHandler = (LexicalEventListener) handler;
  else
      lexicalHandler = null;
    }

    /**
     * Not useful with any HTML parser, since HTML does not use
     * notations or unparsed entities.
     */
    public void setDTDHandler (DTDHandler handler)
  { dtdHandler = handler; }

    /**
     * Not useful with this HTML parser, since it does not read
     * doctype declarations.
     */
    public void setEntityResolver (EntityResolver resolver)
  { this.resolver = resolver; }

    /**
     * Provides the error handler used to report fatal errors
     * and warnings.
     */
    public void setErrorHandler (ErrorHandler handler)
  { errHandler = handler; }

    /**
     * Not useful with this HTML parser, since no control over the
     * language of its diagnostics is provided by the Java runtime.
     */
    public void setLocale (Locale locale)
  { this.locale = locale; }
   
   
    // Swing HTML ParserCallback methods

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void flush () throws BadLocationException
    {
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleText (char data [], int pos)
    {
  if (disabled)
      return;

  try {
      // System.out.println ("text");
      docHandler.characters (data, 0, data.length);
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;
      disabled = true;
      throw x;
  }
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void comment (char data [], int pos)
    {
  if (disabled || lexicalHandler == null)
      return;

  try {
      // System.out.println ("comment");
      lexicalHandler.comment (new String (data));
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;
      disabled = true;
      throw x;
  }
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleStartTag (
      Tag      tag,
      MutableAttributeSet   attributes,
  int      pos
    ) {
  if (disabled)
      return;

  try {
      // System.out.println ("start: " + tag.toString ());
      docHandler.startElement (tag.toString (),
            toAttributeList (attributes));
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;
      disabled = true;
      throw x;
  }
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleEndTag (
      Tag      tag,
  int      pos
    ) {
  if (disabled)
      return;

  try {
      // System.out.println ("end: " + tag.toString ());
      docHandler.endElement (tag.toString ());
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;
      disabled = true;
      throw x;
  }
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleSimpleTag (
      Tag      tag,
      MutableAttributeSet   attributes,
  int      pos
    ) {
  if (disabled)
      return;

  try {
      String  tagName = tag.toString ();

      // System.out.println ("start simple: " + tagName);
      docHandler.startElement (tagName, toAttributeList (attributes));
      // System.out.println ("end simple: " + tagName);
      docHandler.endElement (tagName);
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;
      disabled = true;
      throw x;
  }
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleError (String diagnostic, int pos)
    {
  try {
      // System.out.println ("ERR: " + diagnostic);
      errHandler.warning (new SAXParseException (
    diagnostic,
    null, null,
    -1, -1
    ));
  } catch (SAXException e) {
      DeferredException x = new DeferredException ();
      x.x = e;;
      disabled = true;
      throw x;
  }
    }

    //
    // Convert Swing's model of an attribute set to SAX's.
    //
    private AttributeListImpl toAttributeList (MutableAttributeSet attrs)
    {
  attributes.clear ();
  if (attrs.getAttributeCount () != 0) {
      for (Enumeration e = attrs.getAttributeNames ();
        e.hasMoreElements ();
        ) {
    Object  key = e.nextElement ();
    String  name = key.toString ();
    String  value = attrs.getAttribute (key).toString ();

    attributes.addAttribute (name, "CDATA", value);
      }
  }
  return attributes;
    }

    //
    // We wrap an exception (only one!) in this class and throw them.
    // The HotJava parser seems to continue reporting events after we
    // throw ... so remember to disable ongoing reporting!!
    //
    static class DeferredException extends RuntimeException
    {
  SAXException  x;
  IOException  ioe;
    }


/*
    // TESTING ONLY

    public static void main (String argv [])
    {
  HtmlSaxParser    parser = new HtmlSaxParser ();
  XmlDocumentBuilder  builder = new XmlDocumentBuilder ();

  try {
      parser.setDocumentHandler (builder);
      parser.parse (argv [0]);
      builder.getDocument ().write (System.out);

  } catch (Throwable t) {
      t.printStackTrace ();
  }
    }
*/
TOP

Related Classes of com.sun.xml.xhtml.HtmlSaxParser$DeferredException

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.