Package org.cyberneko.html.parsers

Examples of org.cyberneko.html.parsers.DOMParser


    // MAIN
    //

    /** Main. */
    public static void main(String[] argv) throws Exception {
        DOMParser parser = new DOMParser();
        for (int i = 0; i < argv.length; i++) {
            parser.parse(argv[i]);
            print(parser.getDocument(), "");
        }
    } // main(String[])
View Full Code Here


    // MAIN
    //

    /** Main. */
    public static void main(String[] argv) throws Exception {
        DOMParser parser = new DOMParser();
        for (int i = 0; i < argv.length; i++) {
            parser.parse(argv[i]);
            print(parser.getDocument(), "");
        }
    } // main(String[])
View Full Code Here

  }

  private Document parseFragment(String source) throws SAXException, IOException {
    InputSource input = new InputSource(new StringReader(source));
    if (attemptFullDocParseFirst(source)) {
      DOMParser parser = new DOMParser();
      // Force parser not to use HTMLDocumentImpl as document implementation otherwise
      // it forces all element names to uppercase.
      parser.setProperty("http://apache.org/xml/properties/dom/document-class-name",
          "org.apache.xerces.dom.DocumentImpl");
      // Dont convert element names to upper/lowercase
      parser.setProperty("http://cyberneko.org/html/properties/names/elems", "default");
      // Preserve case of attributes
      parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
      // Record entity references
      parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
      parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
      // No need to defer as full DOM is walked later
      parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
      parser.parse(input);
      return parser.getDocument();
    } else {
      Document htmlDoc = documentProvider.createDocument(null, null, null);
      // Workaround for error check failure adding text node to entity ref as a child
      htmlDoc.setStrictErrorChecking(false);
      DOMFragmentParser parser = new DOMFragmentParser();
      parser.setProperty("http://cyberneko.org/html/properties/names/elems", "default");
      parser.setFeature("http://cyberneko.org/html/features/document-fragment", true);
      parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
      parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
      parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
      DocumentFragment fragment = htmlDoc.createDocumentFragment();
      parser.parse(input, fragment);
      normalizeFragment(htmlDoc, fragment);
      return htmlDoc;
    }
  }
View Full Code Here

                    fPrinter.flush();
                }
               
            }};
           
                DOMParser parser = new DOMParser();
            parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
            parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
            parser.setProperty("http://cyberneko.org/html/properties/default-encoding", fileEncoding);
            parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
                parser.parse(is);
               
                str = "<!DOCTYPE html>" + baos.toString(fileEncoding);
           
          } catch (Exception e) {
            logger.error(e.getMessage(), e);
View Full Code Here

   *             if an IO failure occurs.
   * @throws SAXException
   *             if an exception occurs while parsing the HTML string.
   */
  public static Document asDocument(String html) throws IOException {
    DOMParser domParser = new DOMParser();
    try {
      domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
      domParser.setFeature("http://xml.org/sax/features/namespaces", false);
      domParser.parse(new InputSource(new StringReader(html)));
    } catch (SAXException e) {
      throw new IOException("Error while reading HTML: " + html, e);
    }
    return domParser.getDocument();
  }
View Full Code Here

   *             if an exception occurs while parsing the HTML string.
   * @throws IOException
   *             if an IO failure occurs.
   */
  public static Document getDocumentNoBalance(String html) throws SAXException, IOException {
    DOMParser domParser = new DOMParser();
    domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    domParser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
    domParser.parse(new InputSource(new StringReader(html)));
    return domParser.getDocument();
  }
View Full Code Here

   *             if an IO failure occurs.
   * @throws SAXException
   *             if an exception occurs while parsing the HTML string.
   */
  public static Document asDocument(String html) throws IOException {
    DOMParser domParser = new DOMParser();
    try {
      domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
      domParser.setFeature("http://xml.org/sax/features/namespaces", false);
      domParser.parse(new InputSource(new StringReader(html)));
    } catch (SAXException e) {
      throw new IOException("Error while reading HTML: " + html, e);
    }
    return domParser.getDocument();
  }
View Full Code Here

   *             if an exception occurs while parsing the HTML string.
   * @throws IOException
   *             if an IO failure occurs.
   */
  public static Document getDocumentNoBalance(String html) throws SAXException, IOException {
    DOMParser domParser = new DOMParser();
    domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    domParser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
    domParser.parse(new InputSource(new StringReader(html)));
    return domParser.getDocument();
  }
View Full Code Here

   *             if an IO failure occurs.
   * @throws SAXException
   *             if an exception occurs while parsing the HTML string.
   */
  public static Document getDocument(String html) throws SAXException, IOException {
    DOMParser domParser = new DOMParser();
    domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    domParser.setFeature("http://xml.org/sax/features/namespaces", false);
    domParser.parse(new InputSource(new StringReader(html)));
    return domParser.getDocument();
  }
View Full Code Here

   *             if an exception occurs while parsing the HTML string.
   * @throws IOException
   *             if an IO failure occurs.
   */
  public static Document getDocumentNoBalance(String html) throws SAXException, IOException {
    DOMParser domParser = new DOMParser();
    domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    domParser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
    domParser.parse(new InputSource(new StringReader(html)));
    return domParser.getDocument();
  }
View Full Code Here

TOP

Related Classes of org.cyberneko.html.parsers.DOMParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.