Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    // Constructors
    //

    /** Default constructor. */
    public HTMLSAXParser() {
        super(new HTMLConfiguration());
    } // <init>()
View Full Code Here


            remover,
            writer,
        };

        // create HTML parser
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setProperty("http://cyberneko.org/html/properties/filters", filters);

        // parse documents
        for (int i = 0; i < argv.length; i++) {
            String systemId = argv[i];
            XMLInputSource source = new XMLInputSource(null, systemId, null);
            parser.parse(source);
        }

    } // main(String[])
View Full Code Here

                    // create filters
                    out = new FileOutputStream(outfile);
                    XMLDocumentFilter[] filters = { new Writer(out) };
                   
                    // create parser
                    XMLParserConfiguration parser = new HTMLConfiguration();

                    // parser settings
                    parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
                    String infilename = infile.toString();
                    File insettings = new File(infilename+".settings");
                    if (insettings.exists()) {
                        BufferedReader settings = new BufferedReader(new FileReader(insettings));
                        String settingline;
                        while ((settingline = settings.readLine()) != null) {
                            StringTokenizer tokenizer = new StringTokenizer(settingline);
                            String type = tokenizer.nextToken();
                            String id = tokenizer.nextToken();
                            String value = tokenizer.nextToken();
                            if (type.equals("feature")) {
                                parser.setFeature(id, value.equals("true"));
                            }
                            else {
                                parser.setProperty(id, value);
                            }
                        }
                        settings.close();
                    }

                    // parse
                    parser.parse(new XMLInputSource(null, infilename, null));
                }
                catch (Exception e) {
                    log("  error parsing input file, "+infile);
                    throw new BuildException(e);
                }
View Full Code Here

    public static void main(String[] argv) throws Exception {
        if (argv.length == 0) {
            printUsage();
            System.exit(1);
        }
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setFeature(NOTIFY_CHAR_REFS, true);
        parser.setFeature(NOTIFY_HTML_BUILTIN_REFS, true);
        String iencoding = null;
        String oencoding = "Windows-1252";
        boolean identity = false;
        boolean purify = false;
        for (int i = 0; i < argv.length; i++) {
            String arg = argv[i];
            if (arg.equals("-ie")) {
                iencoding = argv[++i];
                continue;
            }
            if (arg.equals("-e") || arg.equals("-oe")) {
                oencoding = argv[++i];
                continue;
            }
            if (arg.equals("-i")) {
                identity = true;
                continue;
            }
            if (arg.equals("-p")) {
                purify = true;
                continue;
            }
            if (arg.equals("-h")) {
                printUsage();
                System.exit(1);
            }
            java.util.Vector filtersVector = new java.util.Vector(2);
            if (identity) {
                filtersVector.addElement(new Identity());
            }
            else if (purify) {
                filtersVector.addElement(new Purifier());
            }
            filtersVector.addElement(new Writer(System.out, oencoding));
            XMLDocumentFilter[] filters =
                new XMLDocumentFilter[filtersVector.size()];
            filtersVector.copyInto(filters);
            parser.setProperty(FILTERS, filters);
            XMLInputSource source = new XMLInputSource(null, arg, null);
            source.setEncoding(iencoding);
            parser.parse(source);
        }
    } // main(String[])
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public DOMFragmentParser() {
        fParserConfiguration = new HTMLConfiguration();
        fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
        fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
        fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
        fParserConfiguration.setDocumentHandler(this);
    } // <init>()
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public DOMParser() {
        super(new HTMLConfiguration());
        /*** extending DOMParser ***/
        try {
            setProperty("http://apache.org/xml/properties/dom/document-class-name",
                                       "org.apache.html.dom.HTMLDocumentImpl");
        }
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public SAXParser() {
        super(new HTMLConfiguration());
    } // <init>()
View Full Code Here

  @Override
  protected Document parseDomImpl(String source) throws GadgetException {
    DocumentHandler handler;

    HTMLConfiguration config = newConfiguration();
    try {
      handler = parseHtmlImpl(source, config, new NormalizingTagBalancer());
    } catch (IOException ioe) {
      return null;
    }
View Full Code Here

  @Override
  protected DocumentFragment parseFragmentImpl(String source) throws GadgetException {
    DocumentHandler handler;

    HTMLConfiguration config = newConfiguration();
    // http://cyberneko.org/html/features/balance-tags/document-fragment
    // deprecated http://cyberneko.org/html/features/document-fragment
    config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
    config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack",
        new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)});

    try {
      handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer());
    } catch (IOException ioe) {
View Full Code Here

      bodyFirst = headScript;
    }
  }

  protected HTMLConfiguration newConfiguration() {
    HTMLConfiguration config = new HTMLConfiguration();
    // Maintain original case for elements and attributes
    config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
    // Get notified of entity and character references
    config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
    config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
    config.setFeature("http://xml.org/sax/features/namespaces", true);
    return config;
  }
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.