Package org.w3c.tidy

Examples of org.w3c.tidy.Tidy


    // there is also a javax.swing.text.Document class.
    org.w3c.dom.Document document = null;

        StringWriter sw = new StringWriter();
        Tidy tidy = XPathUtil.makeTidyParser(true, true, true, sw);
        document = tidy.parseDOM(baIS, null);
        document.normalize();
        if (tidy.getParseErrors() > 0) {
            showErrorMessageDialog(sw.toString(),
                    "Tidy: " + tidy.getParseErrors() + " errors, " + tidy.getParseWarnings() + " warnings",
                    JOptionPane.WARNING_MESSAGE);
        }

        JPanel domTreePanel = new DOMTreePanel(document);   
        view = domTreePanel;
View Full Code Here


   *
   * @return a <code>tidy</code> HTML parser
   */
  private static Tidy getTidyParser() {
    log.debug("Start : getParser");
    Tidy tidy = new Tidy();
    tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    if (log.isDebugEnabled()) {
      log.debug("getParser : tidy parser created - " + tidy);
    }
    log.debug("End   : getParser");
    return tidy;
View Full Code Here

   * @throws TidyException if a ParseError is detected and report_errors is true
   */
  private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
          boolean isXML) throws TidyException {
        StringWriter sw = new StringWriter();
    Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
    Document doc = tidy.parseDOM(stream, null);
    doc.normalize();
    if (tidy.getParseErrors() > 0) {
      if (report_errors) {
              log.error("TidyException: " + sw.toString());   
          throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());
      }
        log.warn("Tidy errors: " + sw.toString());
    }
    return doc;
  }
View Full Code Here

   * @param isXml - treat the content as XML?
   * @param stringWriter - if non-null, use this for Tidy errorOutput
   * @return the Tidy parser
   */
    public static Tidy makeTidyParser(boolean quiet, boolean showWarnings, boolean isXml, StringWriter stringWriter) {
        Tidy tidy = new Tidy();
        tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
        tidy.setQuiet(quiet);
        tidy.setShowWarnings(showWarnings);
        tidy.setMakeClean(true);
        tidy.setXmlTags(isXml);
        if (stringWriter != null) {
            tidy.setErrout(new PrintWriter(stringWriter));
        }
        return tidy;
    }
View Full Code Here

     * @throws TidyException if a ParseError is detected and report_errors is true
     */
    private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
            boolean isXML, OutputStream out) throws TidyException {
        StringWriter sw = new StringWriter();
        Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
        Document doc = tidy.parseDOM(stream, out);
        doc.normalize();
        if (tidy.getParseErrors() > 0) {
            if (report_errors) {
                log.error("TidyException: " + sw.toString());
                throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());
            }
            log.warn("Tidy errors: " + sw.toString());
        }
        return doc;
    }
View Full Code Here

     * @param isXml - treat the content as XML?
     * @param stringWriter - if non-null, use this for Tidy errorOutput
     * @return the Tidy parser
     */
    public static Tidy makeTidyParser(boolean quiet, boolean showWarnings, boolean isXml, StringWriter stringWriter) {
        Tidy tidy = new Tidy();
        tidy.setInputEncoding("UTF8");
        tidy.setOutputEncoding("UTF8");
        tidy.setQuiet(quiet);
        tidy.setShowWarnings(showWarnings);
        tidy.setMakeClean(true);
        tidy.setXmlTags(isXml);
        if (stringWriter != null) {
            tidy.setErrout(new PrintWriter(stringWriter));
        }
        return tidy;
    }
View Full Code Here

        encoding = encoding.substring(0,i).trim();
      if (encoding.indexOf("\"")!=-1)
        encoding = encoding.substring(1,encoding.length()+1);
    }

    Tidy tidy = (Tidy) perThreadTidy.get();

    // If charset is specified in header, set JTidy's
    // character encoding  to either UTF-8, ISO-8859-1
    // or ISO-2022 accordingly (NOTE that these are
    // the only character encoding sets that are supported in
    // JTidy).  If character encoding is not specified,
    // UTF-8 is the default.
    if (encoding != null)
    {
      if (encoding.toLowerCase().equals("iso-8859-1"))
        tidy.setCharEncoding(org.w3c.tidy.Configuration.LATIN1);
      else if (encoding.toLowerCase().equals("iso-2022-jp"))
        tidy.setCharEncoding(org.w3c.tidy.Configuration.ISO2022);
      else
        tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    }
    else
    {
      tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    }

//    tidy.setErrout(devNull);

    ByteArrayOutputStream stream = new ByteArrayOutputStream (1024);
    BufferedOutputStream out = new BufferedOutputStream (stream);

    tidy.parse (urlConnect.getInputStream(), out);
    tidiedXml = stream.toString();
    stream.close();
    out.close();

    if ( tidy.getParseErrors() > 0 )
      throw new GeneralRenderingException("Unable to convert input document to XHTML");
    }finally{
        long elapsedTimeMillis = System.currentTimeMillis()-start;
        logAccess(uri, state, status, elapsedTimeMillis);
    }
View Full Code Here

      // Validates using JTidy
      if(validateUsingTidy()){
        ByteArrayOutputStream out = null;
        try {
          out = new ByteArrayOutputStream();
          Tidy tidy = new Tidy();
          tidy.setXHTML(false);
          tidy.setCharEncoding(org.w3c.tidy.Configuration.RAW);
          tidy.setErrout(new PrintWriter(out, true));
          tidy.parse(file.getContents(), null);
         
          String errors = new String(out.toByteArray());
         
          errors = errors.replaceAll("\r\n","\n");
          errors = errors.replaceAll("\r"  ,"\n");
View Full Code Here

    }
   
    preM.appendTail(sb);
   
    // Now we can safely use jTidy
    Tidy tidy = new Tidy();
    tidy.setConfigurationFromFile(webSite.getFile
        (webSite.getAdminPath().add("tidy.config")).getAbsolutePath());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
   
    try {
      tidy.parse(new ByteArrayInputStream(sb.toString().getBytes("utf-8")), baos);
      return WebUtils.convertToHTMLEntities(baos.toString("utf-8"),
          Utils.SYSTEM_CHARSET, false);
    } catch (Exception ex) {
      webSite.log("Error while tidying HTML code", ex);
    }
View Full Code Here

      // fall through, this wasn't xml
    }

    try
    {
      Tidy tidy = new Tidy();
      tidy.setXmlOut( true );
      tidy.setShowWarnings( false );
      tidy.setErrout( new PrintWriter( new StringWriter() ) );
      // tidy.setQuiet(true);
      tidy.setNumEntities( true );
      tidy.setQuoteNbsp( true );
      tidy.setFixUri( false );

      Document document = tidy.parseDOM( new ByteArrayInputStream( content.getBytes() ), null );
      StringWriter writer = new StringWriter();
      XmlUtils.serializePretty( document, writer );
      return writer.toString();
    }
    catch( Throwable e )
View Full Code Here

TOP

Related Classes of org.w3c.tidy.Tidy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.