Package org.apache.jetspeed.rewriter.html.neko

Source Code of org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.rewriter.html.neko;

import java.io.Reader;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.apache.xerces.xni.parser.XMLInputSource;

import org.apache.jetspeed.rewriter.ParserAdaptor;
import org.apache.jetspeed.rewriter.Rewriter;
import org.apache.jetspeed.rewriter.RewriterException;

import org.xml.sax.SAXException ;

import org.cyberneko.html.parsers.SAXParser;
import org.cyberneko.html.filters.DefaultFilter;
import org.cyberneko.html.filters.Purifier;


/**
* <p>
* NekoParserAdapter
* </p>
* <p>
* </p>
* @author <a href="mailto:dyoung@phase2systems.com">David L Young</a>
* @version $Id: $
*
*/
public class NekoParserAdaptor implements ParserAdaptor
{
    protected final static Log log = LogFactory.getLog(NekoParserAdaptor.class);
   
    /*
     * Construct a cyberneko HTML parser adaptor
     */
    public NekoParserAdaptor()
    {
        super();
    }
   
    /**
     * <p>
     * parse
     * </p>
     *
     * @see org.apache.jetspeed.rewriter.ParserAdaptor#parse(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader)
     * @param rewriter
     * @param reader
     * @throws RewriterException
     */
    public void parse(Rewriter rewriter, Reader reader)
            throws RewriterException
    {
        // not sure what this means to parse without rewriting
        rewrite(rewriter,reader,null);
    }

    /**
     * <p>
     * rewrite
     * </p>
     *
     * @see org.apache.jetspeed.rewriter.ParserAdaptor#rewrite(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader, java.io.Writer)
     * @param rewriter
     * @param reader
     * @param writer
     * @throws RewriterException
     */
    public void rewrite(Rewriter rewriter, java.io.Reader reader, java.io.Writer writer)
            throws RewriterException
    {
        // use a cyberneko SAXParser
        SAXParser parser = new SAXParser() ;

        // setup filter chain
        XMLDocumentFilter[] filters = {
            new Purifier(),                                                                                  // [1] standard neko purifications (tag balancing, etc)
            new CallbackElementRemover( rewriter ),                                                          // [2] accept / reject tags based on advice from rewriter
            writer != null ? new org.cyberneko.html.filters.Writer( writer, null ) : new DefaultFilter()     // [3] propagate results to specified writer (or do nothing -- Default -- when writer is null)
        };
       
        String filtersPropName = "http://cyberneko.org/html/properties/filters";
  
        try
        {
            parser.setProperty(filtersPropName, filters);
        }
        catch (SAXException e)
        {
            // either no longer supported (SAXNotSupportedException), or no logner recognized (SAXNotRecognizedException)
            log.error(filtersPropName + " is, unexpectedly, no longer defined for the cyberneko HTML parser",e);
            throw new RewriterException("cyberneko parser version not supported",e);
        }

        try
        {
            // parse from reader
            parser.parse(new XMLInputSource( null, null, null, reader, null )) ;
        }
        catch (IOException e)
        {
            String msg = "cyberneko HTML parsing failure";
            log.error(msg,e);
            throw new RewriterException(msg,e);
        }

    }

}
TOP

Related Classes of org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.