Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    /** The node representing the document. **/
    private Node _documentNode;


    static NekoDOMParser newParser( DocumentAdapter adapter, URL url ) {
        final HTMLConfiguration configuration = new HTMLConfiguration();
        if (!HTMLParserFactory.getHTMLParserListeners().isEmpty() || HTMLParserFactory.isParserWarningsEnabled()) {
            configuration.setErrorHandler( new ErrorHandler( url ) );
            configuration.setFeature( REPORT_ERRORS, true);
        }
        configuration.setFeature( AUGMENTATIONS, true );
        final ScriptFilter javaScriptFilter = new ScriptFilter( configuration );
        configuration.setProperty( FILTERS, new XMLDocumentFilter[] { javaScriptFilter } );
        if (HTMLParserFactory.isPreserveTagCase()) {
            configuration.setProperty( TAG_NAME_CASE, "match" );
            configuration.setProperty( ATTRIBUTE_NAME_CASE, "no-change" );
        }

        try {
            final NekoDOMParser domParser = new NekoDOMParser( configuration, adapter );
            domParser.setFeature( DEFER_NODE_EXPANSION, false );
View Full Code Here


  @Override
  protected Document parseDomImpl(String source) throws GadgetException {
    DocumentHandler handler;

    HTMLConfiguration config = newConfiguration();
    try {
      handler = parseHtmlImpl(source, config, new NormalizingTagBalancer());
    } catch (IOException ioe) {
      return null;
    }
View Full Code Here

  @Override
  protected DocumentFragment parseFragmentImpl(String source) throws GadgetException {
    DocumentHandler handler;

    HTMLConfiguration config = newConfiguration();
    // http://cyberneko.org/html/features/balance-tags/document-fragment
    // deprecated http://cyberneko.org/html/features/document-fragment
    config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
    config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack",
        new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)});

    try {
      handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer());
    } catch (IOException ioe) {
View Full Code Here

      bodyFirst = headScript;
    }
  }

  protected HTMLConfiguration newConfiguration() {
    HTMLConfiguration config = new HTMLConfiguration();
    // Maintain original case for elements and attributes
    config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
    // Get notified of entity and character references
    config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
    config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
    config.setFeature("http://xml.org/sax/features/namespaces", true);
    return config;
  }
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      // config
      // .setFeature(
      // "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
      // true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
    }
    return _config;
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      // config
      // .setFeature(
      // "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
      // true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
    }
    return _config;
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
       _config
       .setFeature(
       "http://cyberneko.org/html/features/balance-tags/document-fragment",
       true);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
View Full Code Here

    // MAIN
    //

    /** Main. */
    public static void main(String[] argv) throws Exception {
        HTMLConfiguration parser = new HTMLConfiguration();
        parser.setFeature(AUGMENTATIONS, true);
        XMLDocumentFilter[] filters = { new Script(parser), new Identity(), new Writer() };
        parser.setProperty(FILTERS, filters);
        for (int i = 0; i < argv.length; i++) {
            parser.parse(new XMLInputSource(null, argv[i], null));
        }
    } // main(String[])
View Full Code Here

        if (fFileSets.size() == 0) {
            throw new BuildException("must specify at least one fileset");
        }

        // create parser
        XMLParserConfiguration parser = new HTMLConfiguration();

        // parse input files and produce output files
        log("Parsing test files and generating output...");
        File outdir = new File(outputdir);
        int size = fFileSets.size();
        for (int i = 0; i < size; i++) {
            FileSet fileset = (FileSet)fFileSets.elementAt(i);
            DirectoryScanner dirscanner = fileset.getDirectoryScanner(project);
            File indir = dirscanner.getBasedir();
            String[] files = dirscanner.getIncludedFiles();
            for (int j = 0; j < files.length; j++) {
                File infile = new File(indir, files[j]);
                File outfile = new File(outdir, files[j]);
                log("  "+outfile, Project.MSG_VERBOSE);
                OutputStream out = null;
                try {
                    out = new FileOutputStream(outfile);
                    XMLDocumentFilter[] filters = { new Writer(out) };
                    parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
                    parser.parse(new XMLInputSource(null, infile.toString(), null));
                }
                catch (Exception e) {
                    log("  error parsing input file, "+infile);
                    throw new BuildException(e);
                }
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public DOMFragmentParser() {
        fParserConfiguration = new HTMLConfiguration();
        fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
        fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
        fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
        fParserConfiguration.setDocumentHandler(this);
    } // <init>()
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.