Package org.w3c.tidy

Examples of org.w3c.tidy.Tidy


        }
        return false;
    }
   
    public InputStream tidy(InputStream is) {
        Tidy tidy = new Tidy();
        tidy.setXHTML(false);
        tidy.setPrintBodyOnly(true);
        tidy.setDocType("loose");
        tidy.setXHTML(true);
        tidy.setForceOutput(true);
        java.io.ByteArrayOutputStream out = new java.io.ByteArrayOutputStream(1024);
        tidy.parse(is,out);
        return new ByteArrayInputStream(out.toByteArray());
    }
View Full Code Here


        // there is also a javax.swing.text.Document class.
        org.w3c.dom.Document document = null;

        StringWriter sw = new StringWriter();
        Tidy tidy = XPathUtil.makeTidyParser(true, true, true, sw);
        document = tidy.parseDOM(baIS, null);
        document.normalize();
        if (tidy.getParseErrors() > 0) {
            showErrorMessageDialog(sw.toString(),
                    "Tidy: " + tidy.getParseErrors() + " errors, " + tidy.getParseWarnings() + " warnings",
                    JOptionPane.WARNING_MESSAGE);
        }

        JPanel domTreePanel = new DOMTreePanel(document);
        resultsScrollPane.setViewportView(domTreePanel);
View Full Code Here

        encoding = encoding.substring(0,i).trim();
      if (encoding.indexOf("\"")!=-1)
        encoding = encoding.substring(1,encoding.length()+1);
    }

    Tidy tidy = new Tidy ();
    tidy.setXHTML (true);
    tidy.setDocType ("omit");
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    tidy.setNumEntities(true);
    tidy.setWord2000(true);

    // If charset is specified in header, set JTidy's
    // character encoding  to either UTF-8, ISO-8859-1
    // or ISO-2022 accordingly (NOTE that these are
    // the only character encoding sets that are supported in
    // JTidy).  If character encoding is not specified,
    // UTF-8 is the default.
    if (encoding != null)
    {
      if (encoding.toLowerCase().equals("iso-8859-1"))
        tidy.setCharEncoding(org.w3c.tidy.Configuration.LATIN1);
      else if (encoding.toLowerCase().equals("iso-2022-jp"))
        tidy.setCharEncoding(org.w3c.tidy.Configuration.ISO2022);
      else
        tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    }
    else
    {
      tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    }

    tidy.setErrout(devNull);

    ByteArrayOutputStream stream = new ByteArrayOutputStream (1024);
    BufferedOutputStream out = new BufferedOutputStream (stream);

    tidy.parse (urlConnect.getInputStream(), out);
    String tidiedXml = stream.toString();
    stream.close();
    out.close();

    if ( tidy.getParseErrors() > 0 )
      throw new GeneralRenderingException("Unable to convert input document to XHTML");

    return tidiedXml;
  }
View Full Code Here

    protected Document getHtmlDocument(Resource resource) {
        String contentType = resource.getContentType();

        if (contentType != null && contentType.startsWith("text/html")) {
            try {
                Tidy tidy = new Tidy();
                tidy.setQuiet(true);
                tidy.setXmlOut(true);
                tidy.setErrout(new PrintWriter(new ByteArrayOutputStream()));

                org.w3c.dom.Document dom = tidy.parseDOM(resource.getInputStream(), null);

                return new DOMReader().read(dom);
            } catch (Exception e) {
            }
        }
View Full Code Here

        }

        String commentCleaned = decodeJavadocTags( description );

        // Using jTidy to clean comment
        Tidy tidy = new Tidy();
        tidy.setDocType( "loose" );
        tidy.setXHTML( true );
        tidy.setXmlOut( true );
        tidy.setInputEncoding( "UTF-8" );
        tidy.setOutputEncoding( "UTF-8" );
        tidy.setMakeClean( true );
        tidy.setNumEntities( true );
        tidy.setQuoteNbsp( false );
        tidy.setQuiet( true );
        tidy.setShowWarnings( false );
        try
        {
            ByteArrayOutputStream out = new ByteArrayOutputStream( commentCleaned.length() + 256 );
            tidy.parse( new ByteArrayInputStream( commentCleaned.getBytes( "UTF-8" ) ), out );
            commentCleaned = out.toString( "UTF-8" );
        }
        catch ( UnsupportedEncodingException e )
        {
            // cannot happen as every JVM must support UTF-8, see also class javadoc for java.nio.charset.Charset
View Full Code Here

        // there is also a javax.swing.text.Document class.
        org.w3c.dom.Document document = null;

        StringWriter sw = new StringWriter();
        Tidy tidy = XPathUtil.makeTidyParser(true, true, true, sw);
        document = tidy.parseDOM(baIS, null);
        document.normalize();
        if (tidy.getParseErrors() > 0) {
            showErrorMessageDialog(sw.toString(),
                    "Tidy: " + tidy.getParseErrors() + " errors, " + tidy.getParseWarnings() + " warnings",
                    JOptionPane.WARNING_MESSAGE);
        }

        JPanel domTreePanel = new DOMTreePanel(document);
        view = domTreePanel;
View Full Code Here

     * @throws TidyException if a ParseError is detected and report_errors is true
     */
    private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
            boolean isXML) throws TidyException {
        StringWriter sw = new StringWriter();
        Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
        Document doc = tidy.parseDOM(stream, null);
        doc.normalize();
        if (tidy.getParseErrors() > 0) {
            if (report_errors) {
                log.error("TidyException: " + sw.toString());
                throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());
            }
            log.warn("Tidy errors: " + sw.toString());
        }
        return doc;
    }
View Full Code Here

     * @param isXml - treat the content as XML?
     * @param stringWriter - if non-null, use this for Tidy errorOutput
     * @return the Tidy parser
     */
    public static Tidy makeTidyParser(boolean quiet, boolean showWarnings, boolean isXml, StringWriter stringWriter) {
        Tidy tidy = new Tidy();
        tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
        tidy.setQuiet(quiet);
        tidy.setShowWarnings(showWarnings);
        tidy.setMakeClean(true);
        tidy.setXmlTags(isXml);
        if (stringWriter != null) {
            tidy.setErrout(new PrintWriter(stringWriter));
        }
        return tidy;
    }
View Full Code Here

        }

        result.setFailure(false);

        // create parser
        Tidy tidy = null;
        try {
            log.debug("HTMLAssertions.getResult(): Setup tidy ...");
            log.debug("doctype: " + getDoctype());
            log.debug("errors only: " + isErrorsOnly());
            log.debug("error threshold: " + getErrorThreshold());
            log.debug("warning threshold: " + getWarningThreshold());
            log.debug("html mode: " + isHTML());
            log.debug("xhtml mode: " + isXHTML());
            log.debug("xml mode: " + isXML());
            tidy = new Tidy();
            tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
            tidy.setQuiet(false);
            tidy.setShowWarnings(true);
            tidy.setOnlyErrors(isErrorsOnly());
            tidy.setDocType(getDoctype());
            if (isXHTML()) {
                tidy.setXHTML(true);
            } else if (isXML()) {
                tidy.setXmlTags(true);
            }
            log.debug("err file: " + getFilename());
            tidy.setErrfile(getFilename());

            if (log.isDebugEnabled()) {
                log.debug("getParser : tidy parser created - " + tidy);
            }
            log.debug("HTMLAssertions.getResult(): Tidy instance created!");

        } catch (Exception e) {//TODO replace with proper Exception
            log.error("Unable to instantiate tidy parser", e);
            result.setFailure(true);
            result.setFailureMessage("Unable to instantiate tidy parser");
            // return with an error
            return result;
        }

        /*
         * Run tidy.
         */
        try {
            log.debug("HTMLAssertions.getResult(): start parsing with tidy ...");

            StringWriter errbuf = new StringWriter();
            tidy.setErrout(new PrintWriter(errbuf));
            // Node node = tidy.parseDOM(new
            // ByteArrayInputStream(response.getResponseData()), null);
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            log.debug("Start : parse");
            Node node = tidy.parse(new ByteArrayInputStream(inResponse.getResponseData()), os);
            if (log.isDebugEnabled()) {
                log.debug("node : " + node);
            }
            log.debug("End   : parse");
            log.debug("HTMLAssertions.getResult(): parsing with tidy done!");
            log.debug("Output: " + os.toString());

            // write output to file
            writeOutput(errbuf.toString());

            // evaluate result
            if ((tidy.getParseErrors() > getErrorThreshold())
                    || (!isErrorsOnly() && (tidy.getParseWarnings() > getWarningThreshold()))) {
                log.debug("HTMLAssertions.getResult(): errors/warnings detected:");
                log.debug(errbuf.toString());
                result.setFailure(true);
                result.setFailureMessage(MessageFormat.format("Tidy Parser errors:   " + tidy.getParseErrors()
                        + " (allowed " + getErrorThreshold() + ") " + "Tidy Parser warnings: "
                        + tidy.getParseWarnings() + " (allowed " + getWarningThreshold() + ")", new Object[0]));
                // return with an error

            } else if ((tidy.getParseErrors() > 0) || (tidy.getParseWarnings() > 0)) {
                // return with no error
                log.debug("HTMLAssertions.getResult(): there were errors/warnings but threshold to high");
                result.setFailure(false);
            } else {
                // return with no error
View Full Code Here

     *
     * @return a <code>tidy</code> HTML parser
     */
    public static Tidy getParser() {
        log.debug("Start : getParser1");
        Tidy tidy = new Tidy();
        tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);

        if (log.isDebugEnabled()) {
            log.debug("getParser1 : tidy parser created - " + tidy);
        }

View Full Code Here

TOP

Related Classes of org.w3c.tidy.Tidy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.