Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    public HtmlSaxParser(Properties properties) {
        super(getConfig(properties));
    }

    protected static HTMLConfiguration getConfig(Properties properties) {
        HTMLConfiguration config = new HTMLConfiguration();
        config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        if (properties != null) {
            for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                String name = (String) i.next();
                config.setProperty(name, properties.getProperty(name));
            }
        }
        return config;
    }
View Full Code Here


        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }
   
        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    config.setProperty(name, properties.getProperty(name));
                }
            }
            return config;
        }
View Full Code Here

    HTMLTagBalancer tagBalancer = new HTMLTagBalancer();
    DocumentHandler handler = new DocumentHandler(source);
    tagBalancer.setDocumentHandler(handler);
    htmlScanner.setDocumentHandler(tagBalancer);

    HTMLConfiguration config = new HTMLConfiguration();
    // Maintain original case for elements and attributes
    config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
    // Parse as fragment.
    config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
    // Get notified of entity and character references
    config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
    config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
    tagBalancer.reset(config);
    htmlScanner.reset(config);
    XMLInputSource inputSource = new XMLInputSource(null, null, null);
    inputSource.setEncoding("UTF-8");
    inputSource.setCharacterStream(new StringReader(source));
View Full Code Here

        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }
   
        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    config.setProperty(name, properties.getProperty(name));
                }
            }
            return config;
        }
View Full Code Here

    public HtmlSaxParser(Properties properties) {
        super(getConfig(properties));
    }

    protected static HTMLConfiguration getConfig(Properties properties) {
        HTMLConfiguration config = new HTMLConfiguration();
        config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        if (properties != null) {
            for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                String name = (String) i.next();
                config.setProperty(name, properties.getProperty(name));
            }
        }
        return config;
    }
View Full Code Here

        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }
   
        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    if (name.indexOf("/features/") > -1) {
                        config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
                    } else if (name.indexOf("/properties/") > -1) {
                        config.setProperty(name, properties.getProperty(name));
                    }
                }
            }
            return config;
        }
View Full Code Here

        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }

        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    if (name.indexOf("/features/") > -1) {
                        config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
                    } else if (name.indexOf("/properties/") > -1) {
                        config.setProperty(name, properties.getProperty(name));
                    }
                }
            }
            return config;
        }
View Full Code Here

    /** The node representing the document. **/
    private Node _documentNode;


    static NekoDOMParser newParser( DocumentAdapter adapter, URL url ) {
        final HTMLConfiguration configuration = new HTMLConfiguration();
        if (!HTMLParserFactory.getHTMLParserListeners().isEmpty() || HTMLParserFactory.isParserWarningsEnabled()) {
            configuration.setErrorHandler( new ErrorHandler( url ) );
            configuration.setFeature( REPORT_ERRORS, true);
        }
        configuration.setFeature( AUGMENTATIONS, true );
        final ScriptFilter javaScriptFilter = new ScriptFilter( configuration );
        configuration.setProperty( FILTERS, new XMLDocumentFilter[] { javaScriptFilter } );
        if (HTMLParserFactory.isPreserveTagCase()) {
            configuration.setProperty( TAG_NAME_CASE, "match" );
            configuration.setProperty( ATTRIBUTE_NAME_CASE, "no-change" );
        }

        try {
            final NekoDOMParser domParser = new NekoDOMParser( configuration, adapter );
            domParser.setFeature( DEFER_NODE_EXPANSION, false );
View Full Code Here

    private StringBuffer buffer;

    public HTMLParser() {

        super(new HTMLConfiguration());
    }
View Full Code Here

         */
        private static XMLParserConfiguration createConfiguration(final WebClient webClient) {
            final BrowserVersion browserVersion = webClient.getBrowserVersion();
            // for IE we need a special scanner that will be able to understand conditional comments
            if (browserVersion.isIE()) {
                return new HTMLConfiguration() {
                    @Override
                    protected HTMLScanner createDocumentScanner() {
                        return new HTMLScannerForIE(browserVersion);
                    }
                };
            }
            return new HTMLConfiguration();
        }
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.