Package org.apache.cocoon.portal.util

Source Code of org.apache.cocoon.portal.util.HtmlSaxParser$ContentFilter

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.portal.util;

import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import java.util.Properties;

import org.apache.cocoon.xml.ContentHandlerWrapper;
import org.apache.excalibur.xml.sax.XMLConsumer;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

/**
* This parser uses the nekohtml parser to parse html and generate sax streams.
*
* @version $Id: HtmlSaxParser.java 433543 2006-08-22 06:22:54Z crossley $
*/
public class HtmlSaxParser extends AbstractSAXParser {

    public HtmlSaxParser(Properties properties) {
        super(getConfig(properties));
    }

    protected static HTMLConfiguration getConfig(Properties properties) {
        HTMLConfiguration config = new HTMLConfiguration();
        config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        if (properties != null) {
            for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                String name = (String) i.next();
                config.setProperty(name, properties.getProperty(name));
            }
        }
        return config;
    }

    /**
     * Parse html stored in the string.
     */
    public static void parseString(String content, ContentHandler ch)
    throws SAXException {
        final HtmlSaxParser parser = new HtmlSaxParser(null);
        parser.setContentHandler(ch);
        if ( ch instanceof LexicalHandler ) {
            parser.setLexicalHandler((LexicalHandler)ch);
        }
        final InputSource is = new InputSource(new StringReader(content));
        try {
            parser.parse(is);
        } catch (IOException ioe) {
            throw new SAXException("Error during parsing of html markup.", ioe);
        }
    }

    public static XMLConsumer getContentFilter(ContentHandler ch) {
        return new ContentFilter(ch);
    }

    protected static final class ContentFilter extends ContentHandlerWrapper {

        public ContentFilter(ContentHandler ch) {
            this.setContentHandler(ch);
            if ( ch instanceof LexicalHandler ) {
                this.setLexicalHandler((LexicalHandler)ch);
            }
        }

        /**
         * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
         */
        public void endElement(String uri, String loc, String raw) throws SAXException {
            if ( !loc.equals("html") && !loc.equals("body") ) {
                super.endElement(uri, loc, raw);
            }
        }

        /**
         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
         */
        public void startElement(String uri, String loc, String raw, Attributes a) throws SAXException {
            if ( !loc.equals("html") && !loc.equals("body") ) {
                super.startElement(uri, loc, raw, a);
            }
        }
    }

}
TOP

Related Classes of org.apache.cocoon.portal.util.HtmlSaxParser$ContentFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.