Package org.w3c.tidy

Examples of org.w3c.tidy.Tidy.parseDOM()


        tidy.setMakeClean(false);
        tidy.setDropFontTags(false);
        tidy.setUpperCaseAttrs(false);
        tidy.setUpperCaseTags(false);
        tidy.setXHTML(true);
        Document document = tidy.parseDOM(in, null);
        removeProcessingInstructions(document);
        // TODO: This is a workaround for empty <li> list items. Try to fix this in the parser itself.
        removeEmptyListItems(document);
        return document;
    }
View Full Code Here


     */
    private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
            boolean isXML) throws TidyException {
        StringWriter sw = new StringWriter();
        Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
        Document doc = tidy.parseDOM(stream, null);
        doc.normalize();
        if (tidy.getParseErrors() > 0) {
            if (report_errors) {
                log.error("TidyException: " + sw.toString());
                throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());
View Full Code Here

    public HtmlDocument(File file) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        org.w3c.dom.Document root =
                tidy.parseDOM(new FileInputStream(file), null);
        rawDoc = root.getDocumentElement();
    }


    /**
 
View Full Code Here

     */
    public HtmlDocument(InputStream is) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        org.w3c.dom.Document root = tidy.parseDOM(is, null);
        rawDoc = root.getDocumentElement();
    }


    /**
 
View Full Code Here

            StringWriter stringWriter = new StringWriter();
            PrintWriter errorWriter = new PrintWriter(stringWriter);
            tidy.setErrout(errorWriter);

            // Extract the document using JTidy and stream it.
            org.w3c.dom.Document doc = tidy.parseDOM(new BufferedInputStream(this.inputSource.getInputStream()), null);

            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);
View Full Code Here

            StringWriter stringWriter = new StringWriter();
            //FIXME ??
            PrintWriter errorWriter = new PrintWriter(stringWriter);
            tidy.setErrout(errorWriter);
            // Extract the document using JTidy and stream it.
            Document doc = tidy.parseDOM(new BufferedInputStream(stream), null);
            errorWriter.flush();
            errorWriter.close();
            return doc;
        } catch (Exception ex) {
            throw new SAXException(ex);
View Full Code Here

            // Extract the document using JTidy and stream it.
            ByteArrayInputStream bais =
                new ByteArrayInputStream(text.getBytes());
            org.w3c.dom.Document doc =
                tidy.parseDOM(new BufferedInputStream(bais), null);

            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);
View Full Code Here

            // Extract the document using JTidy and stream it.

            if (inputSource != null)
                requestStream = this.inputSource.getInputStream();

            org.w3c.dom.Document doc = tidy.parseDOM(new BufferedInputStream(requestStream), null);

            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);
View Full Code Here

    public HtmlDocument(File file) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);

        org.w3c.dom.Document root = tidy.parseDOM(new FileInputStream(file), null);
        rawDoc = root.getDocumentElement();
    }

    /**
     * Constructs an <code>HtmlDocument</code> from an {@link java.io.InputStream}.
View Full Code Here

    public HtmlDocument(InputStream is) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);

        org.w3c.dom.Document root = tidy.parseDOM(is, null);
        rawDoc = root.getDocumentElement();
    }

    /**
     * Creates a Lucene <code>Document</code> from an {@link java.io.InputStream}.
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.