Package org.apache.html.dom

Examples of org.apache.html.dom.HTMLDocumentImpl


    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
        + mimeType);

    Metadata tikamd = new Metadata();

    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {
View Full Code Here


      return parseTagSoup(input);
    else return parseNeko(input);
  }

  private DocumentFragment parseTagSoup(InputSource input) throws Exception {
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    DocumentFragment frag = doc.createDocumentFragment();
    DOMBuilder builder = new DOMBuilder(doc, frag);
    org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
    reader.setContentHandler(builder);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
View Full Code Here

          true);
      parser.setFeature("http://cyberneko.org/html/features/report-errors",
          LOG.isTraceEnabled());
    } catch (SAXException e) {}
    // convert Document to DocumentFragment
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment res = doc.createDocumentFragment();
    DocumentFragment frag = doc.createDocumentFragment();
    parser.parse(input, frag);
    res.appendChild(frag);

    try {
      while(true) {
        frag = doc.createDocumentFragment();
        parser.parse(input, frag);
        if (!frag.hasChildNodes()) break;
        if (LOG.isInfoEnabled()) {
          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
        }
View Full Code Here

    }
  }

  public SeleniumScript(String scriptUrl, String url) throws IOException, SAXException, XPathExpressionException {
    DOMFragmentParser parser = new DOMFragmentParser();
    HTMLDocument document = new HTMLDocumentImpl();
    DocumentFragment fragment = document.createDocumentFragment();
    try {
      parser.parse(scriptUrl, fragment);
      // not nice, it seems that parse also throws a FileNotFoundException sometimes.
      // XXX I don't know why
      if (fragment.getTextContent().contains("The page was not found!")) {
View Full Code Here

            throws DiscoveryException
    {
        if (DEBUG)
            _log.debug("Parsing HTML data:\n" + htmlData);

        HTMLDocumentImpl doc = this.parseDocument(htmlData);

        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new DiscoveryException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.DISCOVERY_HTML_PARSE_ERROR);

        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList linkElements = head.getElementsByTagName("LINK");
        for (int i = 0, len = linkElements.getLength(); i < len; i++)
        {
            HTMLLinkElement linkElement = (HTMLLinkElement) linkElements.item(i);
            setResult(linkElement.getRel(), linkElement.getHref(), result);
View Full Code Here

     */
    public String getHtmlMeta(String input) throws YadisException
    {
        String xrdsLocation = null;

        HTMLDocumentImpl doc = this.parseDocument(input);
        if (DEBUG)
        {
            try
            {
                _log.debug("document:\n" + OpenID4JavaDOMParser.toXmlString(doc));
            } catch (TransformerException e)
            {
                _log.debug("An exception occurs while transforming the document to string in debugging.", e);
            }
        }

        NodeList heads = doc.getElementsByTagName("head");
        if (heads.getLength() != 1)
            throw new YadisException(
                    "HTML response must have exactly one HEAD element, "
                            + "found " + heads.getLength() + " : "
                            + heads.toString(),
                    OpenIDException.YADIS_HTMLMETA_INVALID_RESPONSE);

        HTMLHeadElement head = (HTMLHeadElement) doc.getHead();
        NodeList metaElements = head.getElementsByTagName("META");
        if (metaElements == null || metaElements.getLength() == 0)
        {
            if (DEBUG)
                _log.debug("No <meta> element found under <html><head>. " +
View Full Code Here

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }
View Full Code Here

          .newInstance();
      // Create Document Builder
      DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
      viewStateDocument = docBuilder.newDocument();
    } catch (ParserConfigurationException e) {
      viewStateDocument = new HTMLDocumentImpl();
      _log
          .error(
              "Error on create DOM Document by JAXP, use Xerxes implementation. Check JAXP configuration ",
              e);
    }
View Full Code Here

    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
        + mimeType);

    Metadata tikamd = new Metadata();

    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {
View Full Code Here

    tikaParser.setConf(conf);
    Parser parser = tikaParser.getTikaConfig().getParser("text/html");
    for (int i = 0; i < testPages.length; i++) {
      Metadata tikamd = new Metadata();

      HTMLDocumentImpl doc = new HTMLDocumentImpl();
      doc.setErrorChecking(false);
      DocumentFragment root = doc.createDocumentFragment();
      DOMBuilder domhandler = new DOMBuilder(doc, root);
      ParseContext context = new ParseContext();
      // to add once available in Tika
      //context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
      try {
View Full Code Here

TOP

Related Classes of org.apache.html.dom.HTMLDocumentImpl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.