Examples of org.apache.html.dom.HTMLDocumentImpl

org.apache.html.dom.HTMLDocumentImpl
Implements an HTML document. Provides access to the top level element in the document, its body and title.
Several methods create new nodes of all basic types (comment, text, element, etc.). These methods create new nodes but do not place them in the document tree. The nodes may be placed in the document tree using {@link org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or they may be placed in some other document tree.
Note: <FRAMESET> documents are not supported at the moment, neither are direct document writing ( {@link #open}, {@link #write}) and HTTP attribute methods ( {@link #getURL}, {@link #getCookie}). @xerces.internal @version $Revision: 320583 $ $Date: 2005-09-30 16:56:46 -0400 (Fri, 30 Sep 2005) $ @author Assaf Arkin @see org.w3c.dom.html.HTMLDocument

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }

View Full Code Here

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }

View Full Code Here

    LOG.debug("Using Tika parser " + parser.getClass().getName()
        + " for mime-type " + mimeType);


    Metadata tikamd = new Metadata();


    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    tikamd.set(Metadata.CONTENT_TYPE, mimeType);
    try {
      parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd,context);

View Full Code Here

            throws DiscoveryException
    {
        if (DEBUG)
            _log.debug("Parsing HTML data:\n" + htmlData);


        HTMLDocumentImpl doc = this.parseDocument(htmlData);


        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new DiscoveryException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.DISCOVERY_HTML_PARSE_ERROR);


        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList linkElements = head.getElementsByTagName("LINK");
        for (int i = 0, len = linkElements.getLength(); i < len; i++)
        {
            HTMLLinkElement linkElement = (HTMLLinkElement) linkElements.item(i);
            setResult(linkElement.getRel(), linkElement.getHref(), result);

View Full Code Here

     */
    public String getHtmlMeta(String input) throws YadisException
    {
        String xrdsLocation = null;


        HTMLDocumentImpl doc = this.parseDocument(input);
        if (DEBUG)
        {
            try
            {
                _log.debug("document:\n" + OpenID4JavaDOMParser.toXmlString(doc));
            } catch (TransformerException e)
            {
                _log.debug("An exception occurs while transforming the document to string in debugging.", e);
            }
        }


        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new YadisException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.YADIS_HTMLMETA_INVALID_RESPONSE);


        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList metaElements = head.getElementsByTagName("META");
        if (metaElements == null || metaElements.getLength() == 0)
        {
            if (DEBUG)
                _log.debug("No <meta> element found under <html><head>. " +

View Full Code Here

    //


    /** Main. */
    public static void main(String[] argv) throws Exception {
        DOMFragmentParser parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();
        for (int i = 0; i < argv.length; i++) {
            DocumentFragment fragment = document.createDocumentFragment();
            parser.parse(argv[i], fragment);
            print(fragment, "");
        }
    } // main(String[])

View Full Code Here

      doTest("<html 9='id'></html>", "<HTML/>");
    }


    private void doTest(final String html, final String expected) throws Exception {
        DOMFragmentParser parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();


        DocumentFragment fragment = document.createDocumentFragment();
        InputSource source = new InputSource(new StringReader(html));
        parser.parse(source, fragment);
//        final OutputFormat of = new OutputFormat();
//        of.setOmitXMLDeclaration(true);
//        XMLSerializer s = new XMLSerializer(of);

View Full Code Here

      return parseTagSoup(input);
    else return parseNeko(input);
  }


  private DocumentFragment parseTagSoup(InputSource input) throws Exception {
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    DocumentFragment frag = doc.createDocumentFragment();
    DOMBuilder builder = new DOMBuilder(doc, frag);
    org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
    reader.setContentHandler(builder);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);

View Full Code Here

          true);
      parser.setFeature("http://cyberneko.org/html/features/report-errors",
          LOG.isTraceEnabled());
    } catch (SAXException e) {}
    // convert Document to DocumentFragment
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment res = doc.createDocumentFragment();
    DocumentFragment frag = doc.createDocumentFragment();
    parser.parse(input, frag);
    res.appendChild(frag);


    try {
      while(true) {
        frag = doc.createDocumentFragment();
        parser.parse(input, frag);
        if (!frag.hasChildNodes()) break;
        if (LOG.isInfoEnabled()) {
          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
        }

View Full Code Here

    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
        + mimeType);


    Metadata tikamd = new Metadata();


    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.html.dom.HTMLDocumentImpl

org.ajax4jsf.webapp.nekko.NekkoParser

org.apache.myfaces.tobago.example.test.SeleniumScript

org.apache.nutch.parse.html.HtmlParser

org.apache.nutch.parse.tika.DOMContentUtilsTest

org.apache.nutch.parse.tika.TikaParser

org.apache.xerces.dom.NodeImpl

org.cyberneko.html.DOMFragmentParserTest

org.openid4java.discovery.html.CyberNekoDOMHtmlParser

org.openid4java.discovery.yadis.CyberNekoDOMYadisHtmlParser

org.ryu22e.nico2cal.util.HtmlRemoveUtil

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.