Package org.vietspider.html.util

Examples of org.vietspider.html.util.HTMLText


  protected List<HTMLNode> searchNodes(HTMLNode node, Name name) {
    List<HTMLNode> refsNode = new ArrayList<HTMLNode>();
    searchNodes(node.iterator(), refsNode, name);
    //    System.out.println("refsnode Size " + refsNode.size());

    HTMLText htmlText = new HTMLText();
    HTMLText.EmptyVerify verify = new HTMLText.EmptyVerify();

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    htmlText.searchText(contents, node, verify);
    return contents;
  }
View Full Code Here


  public List<HTMLNode> searchNodes(HTMLNode node, Name name) {
    List<HTMLNode> refsNode = new ArrayList<HTMLNode>();
    searchNodes(node.iterator(), refsNode, name);

    HTMLText htmlText = new HTMLText();
    HTMLText.EmptyVerify verify = new HTMLText.EmptyVerify();

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    htmlText.searchText(contents, node, verify);

    return contents;
  }
View Full Code Here

public class ImageDescRemover extends NodeRemover {
 
  public List<HTMLNode> removeDesc(HTMLNode root) {
    List<HTMLNode> values = new ArrayList<HTMLNode>();
   
    HTMLText textUtils = new HTMLText();
   
    List<HTMLNode> images = nodeUtil.search(root, Name.IMG);
    for(int i = 0; i < images.size(); i++) {
      HTMLNode image = images.get(i);
      HTMLNode parent  = searchUpper(image, Name.TABLE);
      if(parent != null) {
        textUtils.searchText(values, handleTable(parent, image));
//        addValues(handleTable(parent, image), values);
        continue;       
      }
     
      parent  = searchUpper(image, Name.DIV, Name.CENTER);
      if(parent != null) {
        if(isValidText(parent, 3)) {
          textUtils.searchText(values, parent);
//          addValues(parent, values);
        }
      }
    }
   
View Full Code Here

    HTMLNode maxNodeContent = null;

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    CharacterUtil characterUtil = new CharacterUtil();
   
    HTMLText htmlText = new HTMLText();
   
    short selectType = PathConfirmDialog.YES;
    boolean traverse = false;
    for(HTMLNode ele : commons) {
      contents.clear();
      htmlText.searchText(contents, ele);
      int count = countText(characterUtil, contents);

      if(count > maxCountContent) {
        maxCountContent = count;
        maxNodeContent = ele;
View Full Code Here

TOP

Related Classes of org.vietspider.html.util.HTMLText

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.