Package org.vietspider.html

Examples of org.vietspider.html.HTMLNode


      if(isSuper(tdNodes.get(tdIndex), imgNode)) break;
    }

    if(tdIndex >= trNodes.get(trIndex+1).totalOfChildren()) return null;
   
    HTMLNode nodeValue = trNodes.get(trIndex+1).getChild(tdIndex);
    return isValidText(nodeValue, 3) ? nodeValue : null;
   
//    return isValidText(trNodes.get(trIndex+1).getChildren().get(tdIndex), 3);
  }
View Full Code Here


    NodeConfig config = node.getConfig();
    if(config.children().length > 0 || config.children_types().length > 0){
      Iterator<HTMLNode> iter =  node.childIterator();//node.getChildren().iterator();
      while(iter.hasNext()){
        HTMLNode child = iter.next();       
        if(HTML.isChild(node, child.getConfig())) continue;       
        iter.remove();
        if(config.move() == MoveType.INSERT) insert(node, child);
//        if(config.move() == MoveType.ADD) node.getParent().addChild(child);
      }
    }   
View Full Code Here

    if(head == null) head = service.createHeader();     
    if(body == null) body = service.createBody();
   
    Iterator<HTMLNode> iter = children.iterator();
    while(iter.hasNext()){
      HTMLNode ele = iter.next();
      if(ele.isNode(Name.HEAD) || ele.isNode(Name.BODY)) continue;
      if(ele.isNode(Name.SCRIPT)){
        head.addInternalChild(ele);
//        ele.setParent(head);
      } else {
        body.addInternalChild(ele);
//        ele.setParent(body);
View Full Code Here

public class FormChecker {
 
  boolean hasForm(HTMLNode node) {
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.FORM)) return true;
      if(isFormElement(n)) return true;
    }
    return false;
  }
View Full Code Here

public class FormNodeRemover {
 
  boolean hasForm(HTMLNode node) {
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.FORM)) return true;
//      if(isFormElement(n)) return true;
    }
    return false;
  }
View Full Code Here

  }
 
  public boolean isTextBlock(HTMLNode node, boolean checkLink, int w_size, int s_size) {
    NodeIterator nodeIterator = node.iterator();
    while(nodeIterator.hasNext()) {
      HTMLNode iterNode = nodeIterator.next();
      if(checkLink) {
        if(RenderNodeUtils.getAncestor(iterNode, Name.A, 0, 5) != null) continue;
      }
      if(iterNode.isNode(Name.CONTENT)) {
        String text = iterNode.getTextValue();
        int word = txtCounter.countWords(text);
        if(word >= w_size) return true;
        int sentence = txtCounter.countSentence(text);
        if(sentence >= s_size) return true;
      }
View Full Code Here

      boolean isTextBlock = contentChecker.isTextBlock(node, true, 50, 5);
      model.setTextBlockStatus( isTextBlock ? CheckModel.RIGHT : CheckModel.NOT);
    }
    if(model.hasTextBlock()) return true;*/
   
    HTMLNode node = model.getNode();
    if(linkBlockChecker.isLink(model)) {
      toContainerAncestor(model);
      return false;
    }
    if(isLinkList(node)) {
View Full Code Here

    for(int i = 0; i < children.size(); i++) {
      if(!children.get(i).isNode(Name.LI)) continue;
      if(hasLink(children.get(i))) list.add(children.get(i));
    }
    if(list.size() < 3) return false;
    HTMLNode node = list.get(0);
   
    NodeComparator nodeComparator = new NodeComparator();
    int counter = 1;
    int onlyLinkCounter = onlyLink(node) ? 1 : 0;
    for(int i = 1; i < list.size(); i++) {
      HTMLNode n = list.get(i);
      if(onlyLink(n)) onlyLinkCounter++;
      if(!nodeComparator.compare(node, n)) continue;
      counter++;
    }
   
View Full Code Here

  }
 
  private boolean hasLink(HTMLNode node) {
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.A)) return true;
    }
    return false;
  }
View Full Code Here

    if(children == null || children.size() != 1) return false;
    return children.get(0).isNode(Name.A);
  }
 
  private void toContainerAncestor(CheckModel model) {
    HTMLNode node = model.getNode();
    HTMLNode div = RenderNodeUtils.getAncestor(node, Name.DIV, 0, 3);
    if(div != null) {
      if(linkBlockChecker.hasParagraph(div, 1)) return;
      model.setRemoveNode(div);
      return;
    }
   
    HTMLNode tr = RenderNodeUtils.getAncestor(node, Name.TABLE, 0, 5);
    if(tr != null) {
      if(linkBlockChecker.hasParagraph(tr, 1)) return;
      model.setRemoveNode(tr);
      return;
    }
View Full Code Here

TOP

Related Classes of org.vietspider.html.HTMLNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.