Examples of HTMLScanner

This component recognizes the following properties:

@see HTMLElements @see HTMLEntities @author Andy Clark @author Marc Guillemot @author Ahmed Ashour @version $Id: HTMLScanner.java,v 1.19 2005/06/14 05:52:37 andyc Exp $
  • org.htmlparser.scanners.HtmlScanner

  • Examples of HTTP.HTMLScanner

      public int doStartTag() throws JspException
      {
        ServletRequest request = pageContext.getRequest();
        // DavidQ - Breaking out individual variables to make debugging easier.
        // DavidQ - Begin
        HTMLScanner htmlScanner = (HTMLScanner) request.getAttribute(HTML_SCANNER_ATTR);
        ExecContext context = (ExecContext) request.getAttribute(EXEC_CONTEXT_ATTR);
        this.testResult = htmlScanner.getCondition(this.getTagHandler(), this.getConditionMethod(), context, this.parameterList);
        // DavidQ - End

        return EVAL_BODY_INCLUDE;
      }
    View Full Code Here

    Examples of HTTP.HTMLScanner

      public int doStartTag() throws JspException
      {
        ServletRequest request = pageContext.getRequest();
        // DavidQ - Breaking out individual variables to make debugging easier.
        // DavidQ - Begin
        HTMLScanner htmlScanner = (HTMLScanner) request.getAttribute(HTML_SCANNER_ATTR);
        ExecContext context = (ExecContext) request.getAttribute(EXEC_CONTEXT_ATTR);
        this.testResult = htmlScanner.getCondition(this.getTagHandler(), this.getConditionMethod(), context, this.parameterList);
        // DavidQ - End

        return EVAL_BODY_INCLUDE;
      }
    View Full Code Here

    Examples of org.cyberneko.html.HTMLScanner

       * @return a document handler containing the parsed source
       */
      private DocumentHandler parseHtmlImpl(String source) throws IOException {
        HTMLConfiguration config = newConfiguration();

        HTMLScanner htmlScanner = new HTMLScanner();
        HTMLTagBalancer tagBalancer = new HTMLTagBalancer();

        DocumentHandler handler = newDocumentHandler(source, htmlScanner);

        if (config.getFeature("http://xml.org/sax/features/namespaces")) {
          NamespaceBinder namespaceBinder = new NamespaceBinder();
          namespaceBinder.setDocumentHandler(handler);
          namespaceBinder.setDocumentSource(tagBalancer);
          namespaceBinder.reset(config);
          tagBalancer.setDocumentHandler(namespaceBinder);
        } else {
          tagBalancer.setDocumentHandler(handler);
        }

        tagBalancer.setDocumentSource(htmlScanner);
        htmlScanner.setDocumentHandler(tagBalancer);

        tagBalancer.reset(config);
        htmlScanner.reset(config);

        XMLInputSource inputSource = new XMLInputSource(null, null, null);
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(source));
        htmlScanner.setInputSource(inputSource);
        htmlScanner.scanDocument(true);
        return handler;
      }
    View Full Code Here

    Examples of org.cyberneko.html.HTMLScanner

       */
      private DocumentHandler parseHtmlImpl(String source, HTMLConfiguration config,
          NormalizingTagBalancer tagBalancer)
          throws IOException {

        HTMLScanner htmlScanner = new HTMLScanner();
        tagBalancer.setScanner(htmlScanner);

        DocumentHandler handler = newDocumentHandler(source);

        NamespaceBinder namespaceBinder = new NamespaceBinder();
        namespaceBinder.setDocumentHandler(handler);
        namespaceBinder.setDocumentSource(tagBalancer);
        namespaceBinder.reset(config);
        tagBalancer.setDocumentHandler(namespaceBinder);

        // Order of filter is Scanner -> OSMLFilter -> Tag Balancer
        tagBalancer.setDocumentSource(htmlScanner);
        htmlScanner.setDocumentHandler(tagBalancer);

        tagBalancer.reset(config);
        htmlScanner.reset(config);

        XMLInputSource inputSource = new XMLInputSource(null, null, null);
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(source));
        htmlScanner.setInputSource(inputSource);
        htmlScanner.scanDocument(true);
        return handler;
      }
    View Full Code Here

    Examples of org.cyberneko.html.HTMLScanner

       */
      private DocumentHandler parseHtmlImpl(String source, HTMLConfiguration config,
          NormalizingTagBalancer tagBalancer)
          throws IOException {

        HTMLScanner htmlScanner = new HTMLScanner();
        tagBalancer.setScanner(htmlScanner);

        DocumentHandler handler = newDocumentHandler(source);

        NamespaceBinder namespaceBinder = new NamespaceBinder();
        namespaceBinder.setDocumentHandler(handler);
        namespaceBinder.setDocumentSource(tagBalancer);
        namespaceBinder.reset(config);
        tagBalancer.setDocumentHandler(namespaceBinder);

        // Order of filter is Scanner -> OSMLFilter -> Tag Balancer
        tagBalancer.setDocumentSource(htmlScanner);
        htmlScanner.setDocumentHandler(tagBalancer);

        tagBalancer.reset(config);
        htmlScanner.reset(config);

        XMLInputSource inputSource = new XMLInputSource(null, null, null);
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(source));
        htmlScanner.setInputSource(inputSource);
        htmlScanner.scanDocument(true);
        return handler;
      }
    View Full Code Here

    Examples of org.cyberneko.html.HTMLScanner

        this.documentFactory = documentFactory;
      }

      @Override
      protected Document parseDomImpl(String source) {
        HTMLScanner htmlScanner = new HTMLScanner();
        HTMLTagBalancer tagBalancer = new HTMLTagBalancer();
        DocumentHandler handler = new DocumentHandler(source);
        tagBalancer.setDocumentHandler(handler);
        htmlScanner.setDocumentHandler(tagBalancer);

        HTMLConfiguration config = new HTMLConfiguration();
        // Maintain original case for elements and attributes
        config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
        config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
        // Parse as fragment.
        config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
        // Get notified of entity and character references
        config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
        config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
        tagBalancer.reset(config);
        htmlScanner.reset(config);
        XMLInputSource inputSource = new XMLInputSource(null, null, null);
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(source));
        try {
          htmlScanner.setInputSource(inputSource);
          htmlScanner.scanDocument(true);
          Document document = handler.getDocument();
          DocumentFragment fragment = handler.getFragment();
          normalizeFragment(document, fragment);
          HtmlSerializer.attach(document, new NekoSerializer(), source);
          return document;
    View Full Code Here

    Examples of org.cyberneko.html.HTMLScanner

       */
      private DocumentHandler parseHtmlImpl(String source, HTMLConfiguration config,
          NormalizingTagBalancer tagBalancer)
          throws IOException {

        HTMLScanner htmlScanner = new HTMLScanner();
        tagBalancer.setScanner(htmlScanner);

        DocumentHandler handler = newDocumentHandler(source);

        NamespaceBinder namespaceBinder = new NamespaceBinder();
        namespaceBinder.setDocumentHandler(handler);
        namespaceBinder.setDocumentSource(tagBalancer);
        namespaceBinder.reset(config);
        tagBalancer.setDocumentHandler(namespaceBinder);

        // Order of filter is Scanner -> OSMLFilter -> Tag Balancer
        tagBalancer.setDocumentSource(htmlScanner);
        htmlScanner.setDocumentHandler(tagBalancer);

        tagBalancer.reset(config);
        htmlScanner.reset(config);

        XMLInputSource inputSource = new XMLInputSource(null, null, null);
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(source));
        htmlScanner.setInputSource(inputSource);
        htmlScanner.scanDocument(true);
        return handler;
      }
    View Full Code Here

    Examples of org.htmlparser.scanners.HtmlScanner

                    + "  <body>"
                    + "    Some data"
                    + "  </body>"
                    + "</html>");
            parser.addScanner(new TitleScanner(""));
            parser.addScanner(new HtmlScanner());
            parseAndAssertNodeCount(1);
            assertType("html tag", Html.class, node[0]);
            Html html = (Html) node[0];
            NodeList nodeList = new NodeList();
            html.collectInto(nodeList, TitleTag.class);
    View Full Code Here

    Examples of org.htmlparser.scanners.HtmlScanner

         * with methods to access the body and the head.
         */
        public void registerDomScanners()
        {
            registerScanners();
            addScanner(new HtmlScanner());
            addScanner(new BodyScanner());
            addScanner(new HeadScanner());
        }
    View Full Code Here

    Examples of org.htmlparser.scanners.HtmlScanner

      public void testScan() throws Exception {
        createParser("<html>" + "  <head>" + "    <title>Some Title</title>" + "  </head>" + "  <body>" + "    Some data"
            + "  </body>" + "</html>");
        parser.addScanner(new TitleScanner(""));
        parser.addScanner(new HtmlScanner());
        parseAndAssertNodeCount(1);
        assertType("html tag", Html.class, node[0]);
        Html html = (Html) node[0];
        NodeList nodeList = new NodeList();
        html.collectInto(nodeList, TitleTag.class);
    View Full Code Here
    TOP
    Copyright © 2018 www.massapi.com. All rights reserved.
    All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.