Package org.allcolor.xml.parser

Source Code of org.allcolor.xml.parser.CXmlParser

/*
* Copyright (C) 2005 by Quentin Anciaux
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
* for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*  @author Quentin Anciaux
*/
package org.allcolor.xml.parser;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.Reader;
import java.net.URL;

import org.allcolor.dtd.parser.CDTDParser;
import org.allcolor.dtd.parser.CDocType;
import org.allcolor.dtd.parser.CNotation;
import org.allcolor.xml.parser.dom.CAttr;
import org.allcolor.xml.parser.dom.CDom2HTMLDocument;
import org.w3c.dom.Notation;
import org.xml.sax.SAXException;

/**
* The Xml parser implementation.
*
* @author Quentin Anciaux
*/
public final class CXmlParser {
  /** DOCUMENT ME! */
  public static final CDocType dtFr = CXmlParser.loadDTD("/dtd-fr.ser");

  // dtdparser.parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0
  // Transitional//EN\"
  // \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">","http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
  // dtdparser.parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0
  // Frameset//EN\"
  // \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\">","http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd");
  /** DOCUMENT ME! */
  public static final CDocType dtTr = CXmlParser.loadDTD("/dtd-tr.ser");

  static {
    try {
      System.setProperty("sun.net.client.defaultConnectTimeout", "60000");
      System.setProperty("sun.net.client.defaultReadTimeout", "60000");
      System.setProperty("file.encoding", "utf-8");
    } // end try
    catch (final Throwable ignore) {
      final Throwable t = ignore;
      if (t.getClass() == ThreadDeath.class) {
        throw (ThreadDeath) t;
      }
      Throwable cause = ignore.getCause();
      while (cause != null) {
        if (cause.getClass() == ThreadDeath.class) {
          throw (ThreadDeath) cause;
        }
        cause = cause.getCause();
      }
    }
  } // end static

  /**
   * DOCUMENT ME!
   *
   * @param toTest
   *            DOCUMENT ME!
   * @param tester
   *            DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   */
  private static boolean compareByteArray(final byte toTest[],
      final byte tester[]) {
    if (toTest.length < tester.length) {
      return false;
    } // end if

    for (int i = 0; i < tester.length; i++) {
      if (toTest[i] != tester[i]) {
        return false;
      } // end if
    } // end for

    return true;
  } // end compareByteArray()

  /**
   * DOCUMENT ME!
   *
   * @param in
   *            DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   *
   * @throws IOException
   *             DOCUMENT ME!
   */
  public static Reader getReader(final InputStream in) throws IOException {
    final CInputStreamBuffer buffer = new CInputStreamBuffer(in);
    buffer.mark(-1);

    byte bbuffer[] = new byte[2048];
    final ByteArrayOutputStream bout = new ByteArrayOutputStream();
    int inb = -1;
    int count = 0;

    while (((inb = buffer.read(bbuffer)) != -1) && (count < 2048)) {
      bout.write(bbuffer, 0, inb);
      count += inb;
    } // end while

    bbuffer = bout.toByteArray();

    final int length = bbuffer.length;
    buffer.reset();

    char cbuffer[] = new char[length];
    int index = 0;
    String preEnc = null;

    if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0x4c, 0x6f,
        (byte) 0xa7, (byte) 0x94 })) {
      // EBCDIC ?
      preEnc = "Cp1047";
      cbuffer = new String(bbuffer, "Cp1047").toCharArray();
      index = cbuffer.length;
    } // end if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0, 0,
        (byte) 0xfe, (byte) 0xff })) {
      preEnc = "UTF-32BE";
      cbuffer = new String(bbuffer, "UTF-32BE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { (byte) 0xff,
        (byte) 0xfe, 0, 0 })) {
      preEnc = "UTF-32LE";
      cbuffer = new String(bbuffer, "UTF-32LE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { (byte) 0xfe,
        (byte) 0xff })) {
      preEnc = "UTF-16BE";
      cbuffer = new String(bbuffer, "UTF-16BE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { (byte) 0xff,
        (byte) 0xfe })) {
      preEnc = "UTF-16LE";
      cbuffer = new String(bbuffer, "UTF-16LE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0, 0, 0,
        0x3c })) {
      preEnc = "UTF-32BE";
      cbuffer = new String(bbuffer, "UTF-32BE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0x3c, 0, 0,
        0 })) {
      preEnc = "UTF-32LE";
      cbuffer = new String(bbuffer, "UTF-32LE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0, 0x3c, 0,
        0x3f })) {
      preEnc = "UTF-16BE";
      cbuffer = new String(bbuffer, "UTF-16BE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { 0x3c, 0,
        0x3f, 0 })) {
      preEnc = "UTF-16LE";
      cbuffer = new String(bbuffer, "UTF-16LE").toCharArray();
      index = cbuffer.length;
    } // end else if
    else if (CXmlParser.compareByteArray(bbuffer, new byte[] { (byte) 0xef,
        (byte) 0xbb, (byte) 0xbf })) {
      preEnc = "UTF-8";
      cbuffer = new String(bbuffer, "UTF-8").toCharArray();
      index = cbuffer.length;
    } // end else if
    else {
      for (int i = 0; i < length; i++) {
        if (bbuffer[i] > 0) {
          cbuffer[index++] = (char) bbuffer[i];
        } // end if
      } // end for
    } // end else

    final String value = new String(cbuffer, 0, index);

    if (value.indexOf("<?xml ", 0) != -1) {
      String pi = value.substring(value.indexOf("<?xml ", 0));

      if (pi.indexOf("?>", 0) != -1) {
        pi = pi.substring(0, pi.indexOf("?>", 0)) + 2;
      } // end if

      int indexEncoding = pi.indexOf("encoding=\"", 0);

      if (indexEncoding != -1) {
        String encoding = pi.substring(indexEncoding + 10);
        indexEncoding = encoding.indexOf("\"", 0);

        if (indexEncoding != -1) {
          encoding = encoding.substring(0, indexEncoding);
        } // end if

        try {
          if (!"utf-8".equalsIgnoreCase(preEnc)) {
            "test".getBytes(preEnc);

            return new CInputStreamReader(buffer, preEnc);
          } // end if
        } // end try
        catch (final Exception ignore) {
          preEnc = "utf-8";
        } // end catch

        try {
          "test".getBytes(encoding);

          return new CInputStreamReader(buffer, encoding);
        } // end try
        catch (final Exception ignore) {
        } // end catch

        return new CInputStreamReader(buffer, preEnc);
      } // end if

      indexEncoding = pi.indexOf("encoding='", 0);

      if (indexEncoding != -1) {
        String encoding = pi.substring(indexEncoding + 10);
        indexEncoding = encoding.indexOf("'", 0);

        if (indexEncoding != -1) {
          encoding = encoding.substring(0, indexEncoding);
        } // end if

        try {
          if (!"utf-8".equalsIgnoreCase(preEnc)) {
            "test".getBytes(preEnc);

            return new CInputStreamReader(buffer, preEnc);
          } // end if
        } // end try
        catch (final Exception ignore) {
          preEnc = "utf-8";
        } // end catch

        try {
          "test".getBytes(encoding);

          return new CInputStreamReader(buffer, encoding);
        } // end try
        catch (final Exception ignore) {
        }

        return new CInputStreamReader(buffer, preEnc);
      } // end if

      try {
        "test".getBytes(preEnc);

        return new CInputStreamReader(buffer, preEnc);
      } // end try
      catch (final Exception ignore) {
        preEnc = "utf-8";
      } // end catch

      return new CInputStreamReader(buffer, preEnc);
    } // end if

    int indexEncoding = value.indexOf("charset=", 0);

    if (indexEncoding != -1) {
      String encoding = value.substring(indexEncoding + 8);
      indexEncoding = encoding.indexOf("\"", 0);

      if (indexEncoding != -1) {
        encoding = encoding.substring(0, indexEncoding);
      } // end if
      else {
        indexEncoding = encoding.indexOf("'", 0);

        if (indexEncoding != -1) {
          encoding = encoding.substring(0, indexEncoding);
        } // end if
      } // end else

      try {
        if (!"utf-8".equalsIgnoreCase(preEnc)) {
          "test".getBytes(preEnc);

          return new CInputStreamReader(buffer, preEnc);
        } // end if
      } // end try
      catch (final Exception ignore) {
        preEnc = "utf-8";
      } // end catch

      try {
        "test".getBytes(encoding);

        return new CInputStreamReader(buffer, encoding);
      } // end try
      catch (final Exception ignore) {
      }

      return new CInputStreamReader(buffer, preEnc);
    } // end if

    try {
      "test".getBytes(preEnc);

      return new CInputStreamReader(buffer, preEnc);
    } // end try
    catch (final Exception ignore) {
      preEnc = "utf-8";
    } // end catch

    return new CInputStreamReader(buffer, preEnc);
  } // end getReader()

  /**
   * DOCUMENT ME!
   *
   * @param name
   *            DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   */
  public static CDocType loadDTD(final String name) {
    try {
      final ObjectInputStream in = new ObjectInputStream(CXmlParser.class
          .getResourceAsStream(name));
      final CDocType dt = (CDocType) in.readObject();
      in.close();

      return dt;
    } // end try
    catch (final Throwable e) {
      final Throwable t = e;
      if (t.getClass() == ThreadDeath.class) {
        throw (ThreadDeath) t;
      }
      Throwable cause = e.getCause();
      while (cause != null) {
        if (cause.getClass() == ThreadDeath.class) {
          throw (ThreadDeath) cause;
        }
        cause = cause.getCause();
      }
      // System.err.println("reading local dtd if found...");
      if ("/dtd-tr.ser".equals(name)) {
        final URL u = CXmlParser.class.getResource("/dtd-tr.dtd");

        if (u != null) {
          return CDTDParser
              .parse(
                  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">",
                  u.toString(), new CDom2HTMLDocument());
        } // end if
        else {
          return CDTDParser
              .parse(
                  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">",
                  "http://www.w3.org/TRxhtml1/DTD/xhtml1-transitional.dtd",
                  new CDom2HTMLDocument());
        } // end else
      } // end if
      else if ("/dtd-fr.ser".equals(name)) {
        final URL u = CXmlParser.class.getResource("/dtd-fr.dtd");

        if (u != null) {
          return CDTDParser
              .parse(
                  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Frameset//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\">",
                  u.toString(), new CDom2HTMLDocument());
        } // end if
        else {
          return CDTDParser
              .parse(
                  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Frameset//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\">",
                  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd",
                  new CDom2HTMLDocument());
        } // end else
      } // end else if

      return null;
    } // end catch
  } // end loadDTD()

  /**
   * DOCUMENT ME!
   *
   * @param c
   *            DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   */
  public static final boolean VALID_XML11_FIRST_LETTERS(final char c) {
    if ((c >= 'A') && (c <= 'Z')) {
      return true;
    } // end if

    if ((c >= 'a') && (c <= 'z')) {
      return true;
    } // end if

    if ((c == '_') || (c == ':')) {
      return true;
    } // end if

    if ((c >= 0x00C0) && (c <= 0x00D6)) {
      return true;
    } // end if

    if ((c >= 0x00D8) && (c <= 0x00F6)) {
      return true;
    } // end if

    if ((c >= 0x00F8) && (c <= 0x02FF)) {
      return true;
    } // end if

    if ((c >= 0x0370) && (c <= 0x037D)) {
      return true;
    } // end if

    if ((c >= 0x037F) && (c <= 0x1FFF)) {
      return true;
    } // end if

    if ((c >= 0x200C) && (c <= 0x200D)) {
      return true;
    } // end if

    if ((c >= 0x2070) && (c <= 0x218F)) {
      return true;
    } // end if

    if ((c >= 0x2C00) && (c <= 0x2FEF)) {
      return true;
    } // end if

    if ((c >= 0x3001) && (c <= 0xD7FF)) {
      return true;
    } // end if

    if ((c >= 0xF900) && (c <= 0xFDCF)) {
      return true;
    } // end if

    if ((c >= 0xFDF0) && (c <= 0xFFFD)) {
      return true;
    } // end if

    if ((c >= 0x10000) && (c <= 0xEFFFF)) {
      return true;
    } // end if

    return false;
  } // end VALID_XML11_FIRST_LETTERS()

  /** DOCUMENT ME! */
  private boolean caseInsensitive = false;

  /** DOCUMENT ME! */
  private boolean caseInsensitiveBackup = false;

  /** DOCUMENT ME! */
  private CDocType dt = null;

  /** DOCUMENT ME! */
  private boolean firstTag = false;

  /** DOCUMENT ME! */
  protected boolean htmlDocument = false;

  private Reader in = null;

  private int iprev = -1;

  /** DOCUMENT ME! */
  private final IParseHandler parser;

  private final CStringBuilder tagcontent = new CStringBuilder(128);

  /**
   * Creates a new CXmlParser object.
   *
   * @param caseInsensitive
   *            DOCUMENT ME!
   * @param parser
   *            DOCUMENT ME!
   */
  public CXmlParser(final boolean caseInsensitive, final IParseHandler parser) {
    super();
    this.caseInsensitive = caseInsensitive;
    this.caseInsensitiveBackup = caseInsensitive;
    this.parser = parser;
  } // end CXmlParser()

  /**
   * DOCUMENT ME!
   *
   * @param elemName
   *            DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   */
  private boolean canHaveChild(final String elemName) {
    if (this.dt == null) {
      return true;
    } // end if

    final Notation nt = (Notation) this.dt.getNotations().getNamedItem(
        elemName);

    try {
      final CNotation cnt = (CNotation) nt;

      return cnt.getElement().getValidNodes().size() > 0;
    } // end try
    catch (final Throwable ignore) {
      final Throwable t = ignore;
      if (t.getClass() == ThreadDeath.class) {
        throw (ThreadDeath) t;
      }
      Throwable cause = ignore.getCause();
      while (cause != null) {
        if (cause.getClass() == ThreadDeath.class) {
          throw (ThreadDeath) cause;
        }
        cause = cause.getCause();
      }
    }

    return true;
  } // end canHaveChild()

  /**
   * DOCUMENT ME!
   *
   * @param in
   *            DOCUMENT ME!
   *
   * @throws SAXException
   *             DOCUMENT ME!
   */
  public void parse(final Reader in) {
    try {
      this.htmlDocument = false;
      this.caseInsensitive = this.caseInsensitiveBackup;
      this.firstTag = true;
      this.dt = null;

      boolean element = false;
      boolean cdata = false;
      boolean doctype = false;
      boolean pi = false;
      boolean comment = false;
      this.in = in;
      this.iprev = -1;
      int ic = -1;

      if (in.getClass() == CInputStreamReader.class) {
        this.parser
            .setEncoding(((CInputStreamReader) in).getEncoding());
      } // end if

      this.parser.documentStart();

      while ((ic = this.read()) != -1) {
        final char c = (char) ic;
        ic = this.read();

        if (ic != -1) {
          char n = (char) ic;

          if (c == '<') {
            if (n == '?') {
              pi = true;
            } // end if
            else if (n == '!') {
              ic = this.read();

              if (ic == -1) {
                break;
              } // end if

              n = (char) ic;

              if (n == '-') {
                comment = true;
              } // end if
              else if (n == '[') {
                cdata = true;
              } // end else if
              else if ((n == 'd') || (n == 'D')) {
                doctype = true;
              } // end else if
            } // end else if
            else {
              if (CXmlParser.VALID_XML11_FIRST_LETTERS(n)
                  || (n == '/')) {
                element = true;
              } // end if
            } // end else
          } // end if
          else if (n == '<') {
            this.parser.parseText("" + c, false);
            this.iprev = n;
            // struct.hasprev = true;

            continue;
          } // end else if

          if (element) {
            element = false;
            this.tagcontent.reset();

            final char firstChar = n;
            this.tagcontent.append(n);
            int index = 0;
            int indexSep = -1;
            while ((ic = this.read()) != -1) {
              n = (char) ic;
              index++;

              if (n == '>') {
                break;
              } // end if

              if (n == ':') {
                indexSep = index;
              }

              if ((n == ' ') || (n == '/') || (n == '\n')
                  || (n == '\r') || (n == '\t') || (n == 160)) {
                break;
              } // end if

              this.tagcontent.append(n);
            } // end while
            final String tag = this.tagcontent.toString();
            this.tagcontent.reset();
            // parse attributes.
            while ((n == ' ') || (n == '\n') || (n == '\r')
                || (n == '\t') || (n == 160)) {
              if ((ic = this.read()) == -1) {
                break;
              }
              n = (char) ic;
            }
            if (n == '/') {
              this.parseEMPTYTag(tag, null, 0, indexSep);
              if ((ic = this.read()) == -1) {
                break;
              }
              continue;
            } else if (n == '>') {
              if (firstChar == '/') {
                this.parseENDTag(tag, indexSep);
              } else {
                this.parseSTARTTag(tag, null, 0, indexSep);
              }
              continue;
            } else {
              // got attributes
              CAttr[] attrs = null;
              int count = 0;
              this.tagcontent.reset();
              boolean has1stLetter = false;
              if (CXmlParser.VALID_XML11_FIRST_LETTERS(n)) {
                this.tagcontent.append(n);
                has1stLetter = true;
              }
              while ((ic = this.read()) != -1) {
                n = (char) ic;
                if ((n == ' ') || (n == '\r') || (n == '\n')
                    || (n == '\t') || (n == 160)) {
                  if (has1stLetter) {
                    final String name = this.tagcontent
                        .toString();
                    if (attrs == null) {
                      attrs = new CAttr[2];
                    }
                    attrs[count++] = new CAttr(name, name,
                        null, null, true);
                    attrs[count - 1].needToDecode = false;
                    this.parseSTARTTag(tag, attrs, count,
                        indexSep);
                    this.tagcontent.reset();
                    has1stLetter = false;
                    continue;
                  }
                  continue;
                } // end if
                else if (CXmlParser
                    .VALID_XML11_FIRST_LETTERS(n)
                    || (n == '=')) {
                  has1stLetter = false;
                  // attribute found
                  if (n != '=') {
                    this.tagcontent.append(n);
                    while ((ic = this.read()) != -1) {
                      n = (char) ic;
                      if ((n == ' ') || (n == '\r')
                          || (n == '\n')
                          || (n == '\t')
                          || (n == '=') || (n == 160)) {
                        break;
                      } // end if
                      this.tagcontent.append(n);
                    }
                  }
                  // bon on a le nom
                  final String name = this.tagcontent
                      .toString();
                  this.tagcontent.reset();
                  if (ic == -1) {
                    if (attrs == null) {
                      attrs = new CAttr[1];
                    }
                    attrs[count++] = new CAttr(name, name,
                        null, null, true);
                    attrs[count - 1].needToDecode = false;
                    this.parseSTARTTag(tag, attrs, count,
                        indexSep);
                    break;
                  }
                  while ((n == ' ') || (n == '\r')
                      || (n == '\n') || (n == '\t')
                      || (n == 160)) {
                    if ((ic = this.read()) == -1) {
                      break;
                    }
                    n = (char) ic;
                  }
                  if (ic == -1) {
                    break;
                  }
                  if (n == '=') {
                    // go to read value
                    if ((ic = this.read()) == -1) {
                      break;
                    }
                    n = (char) ic;
                    while ((n == ' ') || (n == '\r')
                        || (n == '\n') || (n == '\t')
                        || (n == 160)) {
                      if ((ic = this.read()) == -1) {
                        break;
                      }
                      n = (char) ic;
                    }
                    if (ic == -1) {
                      break;
                    }
                    if (n == '\"') {
                      // read till next "
                      if ((ic = this.read()) == -1) {
                        break;
                      }
                      n = (char) ic;
                      boolean needToDecode = false;
                      if (n == '&') {
                        needToDecode = true;
                      }
                      while (n != '\"') {
                        this.tagcontent.append(n);
                        if ((ic = this.read()) == -1) {
                          break;
                        }
                        n = (char) ic;
                        if (n == '&') {
                          needToDecode = true;
                        }
                      }
                      if (attrs == null) {
                        attrs = new CAttr[2];
                      }
                      if (count == attrs.length) {
                        final CAttr[] nattrs = new CAttr[attrs.length * 2 + 2];
                        System.arraycopy(attrs, 0,
                            nattrs, 0, count);
                        attrs = nattrs;
                      }
                      attrs[count++] = new CAttr(name,
                          this.tagcontent.toString(),
                          null, null, true);
                      if (!needToDecode) {
                        attrs[count - 1].needToDecode = false;
                      }
                      this.tagcontent.reset();
                      continue;
                    } else if (n == '\'') {
                      // read till next '
                      if ((ic = this.read()) == -1) {
                        break;
                      }
                      n = (char) ic;
                      boolean needToDecode = false;
                      if (n == '&') {
                        needToDecode = true;
                      }
                      while (n != '\'') {
                        this.tagcontent.append(n);
                        if ((ic = this.read()) == -1) {
                          break;
                        }
                        n = (char) ic;
                        if (n == '&') {
                          needToDecode = true;
                        }
                      }
                      if (attrs == null) {
                        attrs = new CAttr[2];
                      }
                      if (count == attrs.length) {
                        final CAttr[] nattrs = new CAttr[attrs.length * 2 + 2];
                        System.arraycopy(attrs, 0,
                            nattrs, 0, count);
                        attrs = nattrs;
                      }
                      attrs[count++] = new CAttr(name,
                          this.tagcontent.toString(),
                          null, null, true);
                      if (!needToDecode) {
                        attrs[count - 1].needToDecode = false;
                      }
                      this.tagcontent.reset();
                      continue;
                    } else {
                      this.tagcontent.append(n);
                      boolean needToDecode = false;
                      if (n == '&') {
                        needToDecode = true;
                      }
                      while ((ic = this.read()) != -1) {
                        n = (char) ic;
                        if ((n == ' ') || (n == '\r')
                            || (n == '\n')
                            || (n == '\t')
                            || (n == '>')
                            || (n == 160)) {
                          break;
                        }
                        this.tagcontent.append(n);
                        if (n == '&') {
                          needToDecode = true;
                        }
                      }
                      if (attrs == null) {
                        attrs = new CAttr[2];
                      }
                      if (count == attrs.length) {
                        final CAttr[] nattrs = new CAttr[attrs.length * 2 + 2];
                        System.arraycopy(attrs, 0,
                            nattrs, 0, count);
                        attrs = nattrs;
                      }
                      attrs[count++] = new CAttr(name,
                          this.tagcontent.toString(),
                          null, null, true);
                      if (!needToDecode) {
                        attrs[count - 1].needToDecode = false;
                      }
                      this.tagcontent.reset();
                      if (n == '>') {
                        break;
                      }
                      continue;
                    }
                  } else if (n == '/') {
                    if (attrs == null) {
                      attrs = new CAttr[1];
                    }
                    if (count == attrs.length) {
                      final CAttr[] nattrs = new CAttr[attrs.length * 2 + 2];
                      System.arraycopy(attrs, 0, nattrs,
                          0, count);
                      attrs = nattrs;
                    }
                    attrs[count++] = new CAttr(name, name,
                        null, null, true);
                    attrs[count - 1].needToDecode = false;
                    this.parseEMPTYTag(tag, attrs, count,
                        indexSep);
                    ic = this.read();
                    break;
                  } else if (n == '<') {
                    // struct.hasprev = true;
                    this.iprev = n;
                    break;
                  } else {
                    if (attrs == null) {
                      attrs = new CAttr[2];
                    }
                    if (count == attrs.length) {
                      final CAttr[] nattrs = new CAttr[attrs.length * 2 + 2];
                      System.arraycopy(attrs, 0, nattrs,
                          0, count);
                      attrs = nattrs;
                    }
                    attrs[count++] = new CAttr(name, name,
                        null, null, true);
                    // struct.hasprev = true;
                    this.iprev = n;
                    continue;
                  }
                } else if (n == '>') {
                  break;
                } else if (n == '/') {
                  this.parseEMPTYTag(tag, attrs, count,
                      indexSep);
                  ic = this.read();
                  break;
                } else if (n == '<') {
                  // struct.hasprev = true;
                  this.iprev = n;
                  break;
                }
              }
              if (ic == -1) {
                break;
              }
              if (firstChar == '/') {
                this.parseENDTag(tag, indexSep);
              } else if (n != '/') {
                this.parseSTARTTag(tag, attrs, count, indexSep);
              }
              continue;
            }
          } // end if
          else if (pi) {
            pi = false;
            this.tagcontent.reset();

            while ((ic = this.read()) != -1) {
              n = (char) ic;

              if (n == '?') {
                if ((ic = this.read()) == -1) {
                  break;
                } // end if

                n = (char) ic;

                if (n == '>') {
                  break;
                } // end if
                else {
                  this.tagcontent.append('?');
                  this.tagcontent.append(n);
                  continue;
                } // end else
              } // end if
              else if ((n == ' ') || (n == '\n') || (n == '\r')
                  || (n == '\t') || (n == 160)) {
                break;
              } // end else if

              this.tagcontent.append(n);
            } // end while
            final String tag = this.tagcontent.toString();
            this.tagcontent.reset();
            if (n == '>') {
              this.parser.parsePI(tag, "");

              if (ic == -1) {
                break;
              } // end if
            } // end if
            else {
              if (ic == -1) {
                break;
              } // end if

              while ((ic = this.read()) != -1) {
                n = (char) ic;

                if (n == '?') {
                  if ((ic = this.read()) == -1) {
                    break;
                  } // end if

                  n = (char) ic;

                  if (n == '>') {
                    break;
                  } // end if
                  else {
                    this.tagcontent.append('?');
                    this.tagcontent.append(n);
                    continue;
                  } // end else
                } else if (n == '/') {
                  if ((ic = this.read()) == -1) {
                    break;
                  } // end if

                  n = (char) ic;

                  if (n == '>') {
                    break;
                  } // end if
                  else {
                    this.tagcontent.append('/');
                    this.tagcontent.append(n);
                    continue;
                  } // end else
                }

                this.tagcontent.append(n);
              } // end while

              this.parser
                  .parsePI(tag, this.tagcontent.toString());

              if (ic == -1) {
                break;
              } // end if
            } // end else

            continue;
          } // end else if
          else if (comment) {
            comment = false;

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            this.tagcontent.reset();

            while ((ic = this.read()) != -1) {
              n = (char) ic;

              if (n == '-') {
                if ((ic = this.read()) == -1) {
                  break;
                } // end if

                n = (char) ic;

                if (n == '-') {
                  if ((ic = this.read()) == -1) {
                    break;
                  } // end if

                  n = (char) ic;

                  if (n == '>') {
                    break;
                  } // end if
                  else {
                    this.tagcontent.append('-');
                    this.tagcontent.append('-');
                    this.tagcontent.append(n);
                    continue;
                  } // end else
                } // end if
                else {
                  this.tagcontent.append('-');
                  this.tagcontent.append(n);
                  continue;
                } // end else
              } // end if

              this.tagcontent.append(n);
            } // end while

            this.parser.parseComment(this.tagcontent.toString());

            if (ic == -1) {
              break;
            } // end if

            continue;
          } // end else if
          else if (cdata) {
            cdata = false;

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            this.tagcontent.reset();

            while ((ic = this.read()) != -1) {
              n = (char) ic;

              if (n == ']') {
                if ((ic = this.read()) == -1) {
                  break;
                } // end if

                n = (char) ic;

                if (n == ']') {
                  if ((ic = this.read()) == -1) {
                    break;
                  } // end if

                  n = (char) ic;

                  if (n == '>') {
                    break;
                  } // end if
                  else {
                    this.tagcontent.append(']');
                    this.tagcontent.append(']');
                    this.tagcontent.append(n);
                    continue;
                  } // end else
                } // end if
                else {
                  this.tagcontent.append(']');
                  this.tagcontent.append(n);
                  continue;
                } // end else
              } // end if

              this.tagcontent.append(n);
            } // end while

            this.parser.parseCDATA(this.tagcontent.toString());

            if (ic == -1) {
              break;
            } // end if

            continue;
          } // end else if
          else if (doctype) {
            doctype = false;

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            if ((ic = this.read()) == -1) {
              break;
            } // end if

            this.tagcontent.reset();
            this.tagcontent.append("!DOCTYPE");

            while ((ic = this.read()) != -1) {
              n = (char) ic;

              if (n == '>') {
                break;
              } // end if

              if (n == '[') {
                break;
              } // end if

              this.tagcontent.append(n);
            } // end while

            if (n == '[') {
              this.tagcontent.append(n);

              if (ic == -1) {
                break;
              } // end if

              while ((ic = this.read()) != -1) {
                n = (char) ic;

                if (n == ']') {
                  this.tagcontent.append(n);

                  boolean ableToStop = true;

                  while ((ic = this.read()) != -1) {
                    n = (char) ic;

                    if ((n != '>') && (n != ' ')
                        && (n != '\n') && (n != '\r')
                        && (n != ']')) {
                      ableToStop = false;
                      this.tagcontent.append(n);

                      continue;
                    } // end if
                    else if (n == ']') {
                      ableToStop = true;
                    } // end else if
                    else if (ableToStop && (n == '>')) {
                      break;
                    } // end else if

                    this.tagcontent.append(n);
                  } // end while

                  if ((ic == -1) || (n == '>')) {
                    break;
                  } // end if
                } // end if

                this.tagcontent.append(n);
              } // end while
            } // end if

            this.parser.parseDoctype(this.tagcontent.toString());

            if (ic == -1) {
              break;
            } // end if

            continue;
          } // end else if
          else {
            this.tagcontent.reset();

            boolean decode = false;

            if ((n == '&') || (c == '&')) {
              decode = true;
            } // end if

            this.tagcontent.append(c);
            this.tagcontent.append(n);

            while ((ic = this.read()) != -1) {
              n = (char) ic;

              if (n == '<') {
                break;
              } // end if

              this.tagcontent.append(n);

              if (n == '&') {
                decode = true;
              } // end if
            } // end while

            if (n == '<') {
              this.iprev = n;
              // struct.hasprev = true;
            } // end if

            this.parser.parseText(this.tagcontent.toString(),
                decode);

            if (ic == -1) {
              break;
            } // end if

            continue;
          } // end else
        } // end if
        else {
          this.parser.parseText("" + c, false);
        } // end else
      } // end while

      this.parser.documentEnd();
    } // end try
    catch (final RuntimeException e) {
      throw e;
    } // end catch
    catch (final Exception e) {
      throw new RuntimeException(new SAXException(e));
    } // end catch
    finally {
      this.dt = null;
    } // end finally
  } // end parse()

  /**
   * DOCUMENT ME!
   *
   * @param tagName
   *            DOCUMENT ME!
   * @param tagContent
   *            DOCUMENT ME!
   * @param maxoffset
   *            DOCUMENT ME!
   */
  private void parseEMPTYTag(String tagName, final CAttr[] attributes,
      final int count, final int indexSep) {
    if (this.caseInsensitive) {
      tagName = tagName.toLowerCase();
    } // end if

    this.firstTag = false;
    this.parser.parseEmptyTag(tagName, attributes, count, indexSep);
  } // end parseEMPTYTag()

  /**
   * DOCUMENT ME!
   *
   * @param tagName
   *            DOCUMENT ME!
   */
  private void parseENDTag(String tagName, final int indexSep) {
    tagName = tagName.substring(1);

    if (this.caseInsensitive) {
      tagName = tagName.toLowerCase();
    } // end if

    this.parser.parseEndTag(tagName, indexSep);
  } // end parseENDTag()

  /**
   * DOCUMENT ME!
   *
   * @param tagName
   *            DOCUMENT ME!
   * @param tagContent
   *            DOCUMENT ME!
   * @param maxoffset
   *            DOCUMENT ME!
   */
  private void parseSTARTTag(String tagName, final CAttr[] attributes,
      final int count, final int indexSep) {
    if (this.caseInsensitive) {
      tagName = tagName.toLowerCase();
    } // end if

    if (this.firstTag && !this.htmlDocument
        && tagName.equalsIgnoreCase("html")) {
      if (!this.caseInsensitive) {
        tagName = tagName.toLowerCase();
        this.caseInsensitive = true;
      } // end if
      this.htmlDocument = true;
      this.dt = CXmlParser.dtTr;
    } // end if
    else if (this.htmlDocument && tagName.equalsIgnoreCase("frameset")) {
      this.dt = CXmlParser.dtFr;
    } // end else if

    this.firstTag = false;

    if (this.htmlDocument) {
      if (this.canHaveChild(tagName)) {
        this.parser.parseStartTag(tagName, attributes, count, indexSep);
      } // end if
      else {
        this.parser.parseEmptyTag(tagName, attributes, count, indexSep);
      } // end else
    } // end if
    else {
      this.parser.parseStartTag(tagName, attributes, count, indexSep);
    } // end else
  } // end parseSTARTTag()

  /**
   * DOCUMENT ME!
   *
   * @return DOCUMENT ME!
   *
   * @throws Exception
   *             DOCUMENT ME!
   */
  private int read() throws Exception {
    if (this.iprev != -1) {
      final int ret = this.iprev;
      this.iprev = -1;
      return ret;
    } // end if
    else {
      return this.in.read();
    } // end else
  } // end read()

} // end CXmlParser
TOP

Related Classes of org.allcolor.xml.parser.CXmlParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.