Examples of org.apache.html.dom.HTMLDocumentImpl

org.apache.html.dom.HTMLDocumentImpl
Implements an HTML document. Provides access to the top level element in the document, its body and title.
Several methods create new nodes of all basic types (comment, text, element, etc.). These methods create new nodes but do not place them in the document tree. The nodes may be placed in the document tree using {@link org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or they may be placed in some other document tree.
Note: <FRAMESET> documents are not supported at the moment, neither are direct document writing ( {@link #open}, {@link #write}) and HTTP attribute methods ( {@link #getURL}, {@link #getCookie}). @xerces.internal @version $Revision: 320583 $ $Date: 2005-09-30 16:56:46 -0400 (Fri, 30 Sep 2005) $ @author Assaf Arkin @see org.w3c.dom.html.HTMLDocument

    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
        + mimeType);


    Metadata tikamd = new Metadata();


    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {

View Full Code Here

      return parseTagSoup(input);
    else return parseNeko(input);
  }


  private DocumentFragment parseTagSoup(InputSource input) throws Exception {
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    DocumentFragment frag = doc.createDocumentFragment();
    DOMBuilder builder = new DOMBuilder(doc, frag);
    org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
    reader.setContentHandler(builder);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);

View Full Code Here

          true);
      parser.setFeature("http://cyberneko.org/html/features/report-errors",
          LOG.isTraceEnabled());
    } catch (SAXException e) {}
    // convert Document to DocumentFragment
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment res = doc.createDocumentFragment();
    DocumentFragment frag = doc.createDocumentFragment();
    parser.parse(input, frag);
    res.appendChild(frag);


    try {
      while(true) {
        frag = doc.createDocumentFragment();
        parser.parse(input, frag);
        if (!frag.hasChildNodes()) break;
        if (LOG.isInfoEnabled()) {
          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
        }

View Full Code Here

    }
  }


  public SeleniumScript(String scriptUrl, String url) throws IOException, SAXException, XPathExpressionException {
    DOMFragmentParser parser = new DOMFragmentParser();
    HTMLDocument document = new HTMLDocumentImpl();
    DocumentFragment fragment = document.createDocumentFragment();
    try {
      parser.parse(scriptUrl, fragment);
      // not nice, it seems that parse also throws a FileNotFoundException sometimes.
      // XXX I don't know why
      if (fragment.getTextContent().contains("The page was not found!")) {

View Full Code Here

            throws DiscoveryException
    {
        if (DEBUG)
            _log.debug("Parsing HTML data:\n" + htmlData);


        HTMLDocumentImpl doc = this.parseDocument(htmlData);


        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new DiscoveryException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.DISCOVERY_HTML_PARSE_ERROR);


        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList linkElements = head.getElementsByTagName("LINK");
        for (int i = 0, len = linkElements.getLength(); i < len; i++)
        {
            HTMLLinkElement linkElement = (HTMLLinkElement) linkElements.item(i);
            setResult(linkElement.getRel(), linkElement.getHref(), result);

View Full Code Here

     */
    public String getHtmlMeta(String input) throws YadisException
    {
        String xrdsLocation = null;


        HTMLDocumentImpl doc = this.parseDocument(input);
        if (DEBUG)
        {
            try
            {
                _log.debug("document:\n" + OpenID4JavaDOMParser.toXmlString(doc));
            } catch (TransformerException e)
            {
                _log.debug("An exception occurs while transforming the document to string in debugging.", e);
            }
        }


        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new YadisException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.YADIS_HTMLMETA_INVALID_RESPONSE);


        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList metaElements = head.getElementsByTagName("META");
        if (metaElements == null || metaElements.getLength() == 0)
        {
            if (DEBUG)
                _log.debug("No <meta> element found under <html><head>. " +

View Full Code Here

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }

View Full Code Here

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }

View Full Code Here

    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
        + mimeType);


    Metadata tikamd = new Metadata();


    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {

View Full Code Here

    tikaParser.setConf(conf);
    Parser parser = tikaParser.getTikaConfig().getParser("text/html");
    for (int i = 0; i < testPages.length; i++) {
      Metadata tikamd = new Metadata();


      HTMLDocumentImpl doc = new HTMLDocumentImpl();
      doc.setErrorChecking(false);
      DocumentFragment root = doc.createDocumentFragment();
      DOMBuilder domhandler = new DOMBuilder(doc, root);
      ParseContext context = new ParseContext();
      // to add once available in Tika
      //context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
      try {

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.html.dom.HTMLDocumentImpl

org.ajax4jsf.webapp.nekko.NekkoParser

org.apache.myfaces.tobago.example.test.SeleniumScript

org.apache.nutch.parse.html.HtmlParser

org.apache.nutch.parse.tika.DOMContentUtilsTest

org.apache.nutch.parse.tika.TikaParser

org.apache.xerces.dom.NodeImpl

org.cyberneko.html.DOMFragmentParserTest

org.openid4java.discovery.html.CyberNekoDOMHtmlParser

org.openid4java.discovery.yadis.CyberNekoDOMYadisHtmlParser

org.ryu22e.nico2cal.util.HtmlRemoveUtil

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.