Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    public HTMLSAXParser() {
        super(getConfig());
    }

    private static HTMLConfiguration getConfig() {
        HTMLConfiguration config = new HTMLConfiguration();
        //config.setFeature("http://cyberneko.org/html/features/augmentations", true);
        config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        return config;
    }
View Full Code Here


            // Use URI object to get benefits of proper absolute and relative path resolution for free
            URI baseURI = new URI( baseurl );

            Parser handler = new Parser( baseURI );

            XMLParserConfiguration parser = new HTMLConfiguration();
            parser.setDocumentHandler( handler );
            parser.setFeature( "http://cyberneko.org/html/features/augmentations", true );
            parser.setProperty( "http://cyberneko.org/html/properties/names/elems", "upper" );
            parser.setProperty( "http://cyberneko.org/html/properties/names/attrs", "upper" );
            parser.parse( new XMLInputSource( null, baseurl, baseURI.toString(), stream, "UTF-8" ) );

            return new ArrayList<String>( handler.getLinks() );

        }
        catch ( URISyntaxException e )
View Full Code Here

        public Object createResource() {

            try {

                final HTMLConfiguration config = new HTMLConfiguration();

                config.setFeature("http://xml.org/sax/features/namespaces", false);
                config.setFeature("http://cyberneko.org/html/features/override-doctype", true);
                config.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true);

                // Avoids the inclusion of <HTML><BODY>, etc. around template fragments. Tag balancing will only
                // be performed inside the fragments' root nodes.
                config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);

                config.setProperty("http://cyberneko.org/html/properties/doctype/pubid", "");
                config.setProperty("http://cyberneko.org/html/properties/doctype/sysid", "");
                config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
                config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");

                return new DOMParser(config);

            } catch(final Exception e) {
                throw new ConfigurationException(
View Full Code Here

        }
    }

    @Override
    protected void initParser(Ruby runtime) {
        XMLParserConfiguration config = new HTMLConfiguration();
        XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
        XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
        //XMLDocumentFilter[] filters = { removeNSAttrsFilter,  elementValidityCheckFilter};
        XMLDocumentFilter[] filters = { elementValidityCheckFilter};

        config.setErrorHandler(this.errorHandler);
        parser = new DOMParser(config);

        // see http://nekohtml.sourceforge.net/settings.html for details
        setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
        setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
View Full Code Here

    }
    sb.append("lib/zazl/zazl.js");
    zazlPath = sb.toString();
    this.encoding = encoding;
    this.configScriptTag = configScriptTag;
    parser = new HTMLConfiguration();
        parser.setFeature(AUGMENTATIONS, true);
        XMLDocumentFilter[] filters = { this, new Identity(), new HTMLWriter(out, this.encoding) };
        parser.setProperty(FILTERS, filters);
  }
View Full Code Here

    private StringBuffer buffer;

    public HTMLParser() {

        super(new HTMLConfiguration());
    }
View Full Code Here

    private StringBuffer buffer;

    public HTMLParser() {

        super(new HTMLConfiguration());
    }
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      // config
      // .setFeature(
      // "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
      // true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
    }
    return _config;
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      // config
      // .setFeature(
      // "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
      // true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
    }
    return _config;
View Full Code Here

  /**
   * Parse HTML source.
   * @return a document handler containing the parsed source
   */
  private DocumentHandler parseHtmlImpl(String source) throws IOException {
    HTMLConfiguration config = newConfiguration();

    HTMLScanner htmlScanner = new HTMLScanner();
    HTMLTagBalancer tagBalancer = new HTMLTagBalancer();

    DocumentHandler handler = newDocumentHandler(source, htmlScanner);

    if (config.getFeature("http://xml.org/sax/features/namespaces")) {
      NamespaceBinder namespaceBinder = new NamespaceBinder();
      namespaceBinder.setDocumentHandler(handler);
      namespaceBinder.setDocumentSource(tagBalancer);
      namespaceBinder.reset(config);
      tagBalancer.setDocumentHandler(namespaceBinder);
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.