Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    public static void main(String[] argv) throws Exception {
        if (argv.length == 0) {
            printUsage();
            System.exit(1);
        }
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setFeature(NOTIFY_CHAR_REFS, true);
        parser.setFeature(NOTIFY_HTML_BUILTIN_REFS, true);
        String encoding = "Windows-1252";
        boolean identity = false;
        boolean purify = false;
        for (int i = 0; i < argv.length; i++) {
            String arg = argv[i];
            if (arg.equals("-e")) {
                encoding = argv[++i];
                continue;
            }
            if (arg.equals("-i")) {
                identity = true;
                continue;
            }
            if (arg.equals("-p")) {
                purify = true;
                continue;
            }
            if (arg.equals("-h")) {
                printUsage();
                System.exit(1);
            }
            java.util.Vector filtersVector = new java.util.Vector(2);
            if (identity) {
                filtersVector.addElement(new Identity());
            }
            else if (purify) {
                filtersVector.addElement(new Purifier());
            }
            filtersVector.addElement(new Writer(System.out, encoding));
            XMLDocumentFilter[] filters =
                new XMLDocumentFilter[filtersVector.size()];
            filtersVector.copyInto(filters);
            parser.setProperty(FILTERS, filters);
            parser.parse(new XMLInputSource(null, arg, null));
        }
    } // main(String[])
View Full Code Here


    // Constructors
    //

    /** Default constructor. */
    public HTMLSAXParser() {
        super(new HTMLConfiguration());
    } // <init>()
View Full Code Here

            remover,
            writer,
        };

        // create HTML parser
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setProperty("http://cyberneko.org/html/properties/filters", filters);

        // parse documents
        for (int i = 0; i < argv.length; i++) {
            String systemId = argv[i];
            XMLInputSource source = new XMLInputSource(null, systemId, null);
            parser.parse(source);
        }

    } // main(String[])
View Full Code Here

        StringWriter sw = new StringWriter();
        Writer writer = new Writer(sw, null);
        XMLDocumentFilter[] filters = {
                remover, writer,
        };
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setProperty("http://cyberneko.org/html/properties/filters", filters);

        XMLInputSource source = new XMLInputSource(null, null, null, new StringReader(sourceText), null);
        try {
            parser.parse(source);
            return sw.getBuffer()
                    .toString()
                    .replaceAll("&apos;", "'")
                    .replaceAll("&gt;", ">")
                    .replaceAll("&lt;", "<")
View Full Code Here

        }
    }

    @Override
    protected void initParser(Ruby runtime) {
        XMLParserConfiguration config = new HTMLConfiguration();
        XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
        XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
        //XMLDocumentFilter[] filters = { removeNSAttrsFilter,  elementValidityCheckFilter};
        XMLDocumentFilter[] filters = { elementValidityCheckFilter};

        config.setErrorHandler(this.errorHandler);

        parser = new NokogiriDomParser(config);

        // see http://nekohtml.sourceforge.net/settings.html for details
        setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.