Examples of org.w3c.tidy.Tidy.parseDOM()

org.w3c.tidy.Tidy.parseDOM()
Parses InputStream in and returns a DOM Document node. If out is non-null, pretty prints to OutputStream out.

        tidy.setMakeClean(false);
        tidy.setDropFontTags(false);
        tidy.setUpperCaseAttrs(false);
        tidy.setUpperCaseTags(false);
        tidy.setXHTML(true);
        Document document = tidy.parseDOM(in, null);
        removeProcessingInstructions(document);
        // TODO: This is a workaround for empty <li> list items. Try to fix this in the parser itself.
        removeEmptyListItems(document);
        return document;
    }

View Full Code Here

     */
    private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
            boolean isXML) throws TidyException {
        StringWriter sw = new StringWriter();
        Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
        Document doc = tidy.parseDOM(stream, null);
        doc.normalize();
        if (tidy.getParseErrors() > 0) {
            if (report_errors) {
                log.error("TidyException: " + sw.toString());
                throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());

View Full Code Here

    public HtmlDocument(File file) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        org.w3c.dom.Document root =
                tidy.parseDOM(new FileInputStream(file), null);
        rawDoc = root.getDocumentElement();
    }




    /**

View Full Code Here

     */
    public HtmlDocument(InputStream is) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        org.w3c.dom.Document root = tidy.parseDOM(is, null);
        rawDoc = root.getDocumentElement();
    }




    /**

View Full Code Here

            StringWriter stringWriter = new StringWriter();
            PrintWriter errorWriter = new PrintWriter(stringWriter);
            tidy.setErrout(errorWriter);


            // Extract the document using JTidy and stream it.
            org.w3c.dom.Document doc = tidy.parseDOM(new BufferedInputStream(this.inputSource.getInputStream()), null);


            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);

View Full Code Here

            StringWriter stringWriter = new StringWriter();
            //FIXME ??
            PrintWriter errorWriter = new PrintWriter(stringWriter);
            tidy.setErrout(errorWriter);
            // Extract the document using JTidy and stream it.
            Document doc = tidy.parseDOM(new BufferedInputStream(stream), null);
            errorWriter.flush();
            errorWriter.close();
            return doc;
        } catch (Exception ex) {
            throw new SAXException(ex);

View Full Code Here


            // Extract the document using JTidy and stream it.
            ByteArrayInputStream bais =
                new ByteArrayInputStream(text.getBytes());
            org.w3c.dom.Document doc =
                tidy.parseDOM(new BufferedInputStream(bais), null);


            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);

View Full Code Here

            // Extract the document using JTidy and stream it.


            if (inputSource != null)
                requestStream = this.inputSource.getInputStream();


            org.w3c.dom.Document doc = tidy.parseDOM(new BufferedInputStream(requestStream), null);


            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);

View Full Code Here

    public HtmlDocument(File file) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);


        org.w3c.dom.Document root = tidy.parseDOM(new FileInputStream(file), null);
        rawDoc = root.getDocumentElement();
    }


    /**
     * Constructs an <code>HtmlDocument</code> from an {@link java.io.InputStream}.

View Full Code Here

    public HtmlDocument(InputStream is) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);


        org.w3c.dom.Document root = tidy.parseDOM(is, null);
        rawDoc = root.getDocumentElement();
    }


    /**
     * Creates a Lucene <code>Document</code> from an {@link java.io.InputStream}.

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.