Package org.htmlcleaner

Examples of org.htmlcleaner.HtmlCleaner.clean()


  public static String convertHtml2PlainText( String htmlText ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );

    try {
      TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
      return rootNode.getText().toString();
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }
View Full Code Here


    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
    String result = "";

    try {
      TagNode rootNode = cleaner.clean( new StringReader( messageText ) );

      TagNode[] nodes = rootNode.getElementsByName( "a", true );
      if ( nodes != null && nodes.length > 0 ) {
        for (TagNode tagnode : nodes) {
          tagnode.removeAttribute( "target" );
View Full Code Here

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
    String result = "";

    try {
      TagNode rootNode = cleaner.clean( new StringReader( messageText ) );

      TagNode[] nodes = rootNode.getElementsByName( "a", true );
      if ( nodes != null && nodes.length > 0 ) {
        for (TagNode tagnode : nodes) {
          tagnode.removeAttribute( "target" );
View Full Code Here

  public static String convertHtml2PlainText( String htmlText ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );

    try {
      TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
      return rootNode.getText().toString();
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }
View Full Code Here

      //
      // Check for charset overrides in the HTML start page
      //
      HtmlCleaner cleaner = new HtmlCleaner();
      TagNode httpEquivNode = cleaner.clean(get.getResponseBodyAsStream()).findElementByAttValue("http-equiv", "content-type", true, false);
      if (httpEquivNode != null && httpEquivNode.hasAttribute("content")){
        String value = httpEquivNode.getAttributeByName("content");
        int offset = value.indexOf("charset=");
        if (offset >= -1){
            charset = value.substring(offset+8).toUpperCase();
View Full Code Here

    {
        CleanerProperties props = new CleanerProperties();
        props.setNamespacesAware(false);
       
        HtmlCleaner cleaner = new HtmlCleaner(props);
        return new DomSerializer(props, true).createDOM(cleaner.clean(document(tester)));
    }
   
    /**
     * Asserts that an XPath expression can be found in the most recently
     * rendered Wicket page. Uses
View Full Code Here

    private String clean(String dirtyHtml) {
        try {
            HtmlCleaner cleaner = new HtmlCleaner();

            TagNode root = cleaner.clean(dirtyHtml);

            return new SimpleHtmlSerializer(cleaner.getProperties()).getAsString(root);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
View Full Code Here

        String rawData = StringEscapeUtils.unescapeHtml4(source);
           
        HtmlCleaner cleaner = new HtmlCleaner();
        //CleanerProperties props = cleaner.getProperties();        
        //props.setXXX(...);
        TagNode node = cleaner.clean(rawData);
        TagNode[] myNodes;
       
        myNodes = node.getElementsByName("a", true);
        for (int i=0;i<myNodes.length;i++)
        {
View Full Code Here

    final HashMap<String, String> m = new HashMap<String, String>();

    HtmlCleaner cleaner = new HtmlCleaner();
    //CleanerProperties props = cleaner.getProperties();    
    //props.setXXX(...);
    TagNode node = cleaner.clean(rawPage);
    TagNode[] myNodes;

    // <meta name="..." content="..." />
    // <meta http-equiv="refresh" content=
    myNodes = node.getElementsByName("meta", true);
View Full Code Here

    if (rawPage==null || !StringUtils.containsIgnoreCase(rawPage, "<base")) return null;

    HtmlCleaner cleaner = new HtmlCleaner();
    //CleanerProperties props = cleaner.getProperties();    
    //props.setXXX(...);
    TagNode node = cleaner.clean(rawPage);
    TagNode[] myNodes = node.getElementsByName("base", true);
    if (myNodes==null || myNodes.length==0) return null;
    String href = myNodes[0].getAttributeByName("href");
    if (href!=null) return href;
    return null;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.