Package org.outerj.daisy.diff.helper

Source Code of org.outerj.daisy.diff.helper.NekoHtmlParser$RemoveNamespacesHandler

/*
* Copyright 2004 Guy Van den Broeck
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.outerj.daisy.diff.helper;

import java.io.IOException;

import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
* Parses HTML files using the Neko HTML parser. Puts all elements and attribute
* names to lowercase, removes all namespaces, produces well-formed XML.
*/
public class NekoHtmlParser {

  public SaxBuffer parse(InputSource is) throws IOException, SAXException {
    SaxBuffer buffer = new SaxBuffer();
    parse(is, buffer);

    return buffer;
  }

  public void parse(InputSource is, ContentHandler consumer)
      throws IOException, SAXException {
    if (is == null)
      throw new NullPointerException("is argument is required.");

    SAXParser parser = new SAXParser();
    parser.setFeature("http://xml.org/sax/features/namespaces", true);
    parser
        .setFeature(
            "http://cyberneko.org/html/features/override-namespaces",
            false);
    parser.setFeature(
        "http://cyberneko.org/html/features/insert-namespaces", false);
    parser
        .setFeature(
            "http://cyberneko.org/html/features/scanner/ignore-specified-charset",
            true);
    parser.setProperty(
        "http://cyberneko.org/html/properties/default-encoding",
        "UTF-8");
    parser.setProperty("http://cyberneko.org/html/properties/names/elems",
        "lower");
    parser.setProperty("http://cyberneko.org/html/properties/names/attrs",
        "lower");

    parser.setContentHandler(new RemoveNamespacesHandler(
        new MergeCharacterEventsHandler(consumer)));
    parser.parse(is);
  }

  /**
   * A ContentHandler that drops all namespace information.
   */
  static class RemoveNamespacesHandler implements ContentHandler {
    private ContentHandler consumer;

    public RemoveNamespacesHandler(ContentHandler consumer) {
      this.consumer = consumer;
    }

    public void endDocument() throws SAXException {
      consumer.endDocument();
    }

    public void startDocument() throws SAXException {
      consumer.startDocument();
    }

    public void characters(char ch[], int start, int length)
        throws SAXException {
      consumer.characters(ch, start, length);
    }

    public void ignorableWhitespace(char ch[], int start, int length)
        throws SAXException {
      consumer.ignorableWhitespace(ch, start, length);
    }

    public void endPrefixMapping(String prefix) throws SAXException {
      // dropped on purpose
    }

    public void skippedEntity(String name) throws SAXException {
      // dropped on purpose
    }

    public void setDocumentLocator(Locator locator) {
      consumer.setDocumentLocator(locator);
    }

    public void processingInstruction(String target, String data)
        throws SAXException {
      // dropped on purpose
    }

    public void startPrefixMapping(String prefix, String uri)
        throws SAXException {
      // dropped on purpose
    }

    public void endElement(String namespaceURI, String localName,
        String qName) throws SAXException {
      consumer.endElement("", localName, localName);
    }

    public void startElement(String namespaceURI, String localName,
        String qName, Attributes atts) throws SAXException {
      AttributesImpl newAtts = new AttributesImpl(atts);
      for (int i = 0; i < atts.getLength(); i++) {
        newAtts.setURI(i, "");
        newAtts.setQName(i, newAtts.getLocalName(i));
      }
      consumer.startElement("", localName, localName, atts);
    }
  }

}
TOP

Related Classes of org.outerj.daisy.diff.helper.NekoHtmlParser$RemoveNamespacesHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.