Package org.vietspider.common.text

Examples of org.vietspider.common.text.TextCounter


   
    if(links.size() < 1) {
      return null;
    }

    TextCounter counter = new TextCounter();
    if(counter.countSentence(builder) > 3) return null;
    if(counter.countWords(builder) > 30) return null;
   
    return nodes;
  }
View Full Code Here


    String text = new String(links.get(0).getValue());
    text = text.trim();
    char ch = text.charAt(0);
    if(Character.isLetter(ch) && Character.isLowerCase(ch)) return false;
   
    TextCounter counter = new TextCounter();
    if(counter.countSentence(text) > 1) return false;
   
    return true;
  }
View Full Code Here

    return searchUpper(parent, names);
  }
 
  public boolean isValidText(HTMLNode node, int size) {
    TextRenderer textRenderer = new TextRenderer(node, null);
    TextCounter counter = new TextCounter();
//    System.out.println(textRenderer.getTextValue());
//    System.out.println(counter.count(textRenderer.getTextValue()));
    if(counter.countSentence(textRenderer.getTextValue()) > size) return false;
    return true;
  }
View Full Code Here

  public int getScore() {  return score; }
 
  private void analytics() {
    if(maxPattern > -1) return;
   
    TextCounter textCounter = new TextCounter();
    int s = start;
    int index = start;
    String text = renderer.getTextValue();
    int pattern = 0;
    maxPattern = 0;
    minPattern = -1;
   
    while(index < end) {
      char c = text.charAt(index);
      if(c == '\\') {
        //tinh toan diem cho phan truoc
        int sentence = textCounter.countSentence(text, s, index);
        int word = textCounter.countWord(text, s, index);
//        if(sentence > 5) {
//          System.out.println("========================================");
//          System.out.println(text.substring(s, index));
//          System.out.println(" ===== > "+ sentence+  " : "
//              + word + " : "+ calculate(pattern, sentence, word));
//          System.out.println("========================================");
//        }
        score += new ScoreCalculator().calculate(pattern, sentence, word);
        //end ket thuc
        pattern = 0;
        while(c == '\\') {
          pattern++;
          index++;
          if(index >= end) break;
          c = text.charAt(index);
        }
        s = index;
        if(pattern > maxPattern) maxPattern = pattern;
        if(pattern < minPattern || minPattern < 0) minPattern =  pattern;
        timePattern++;
        totalSeparator += pattern;
      }
      index++;
    }
   
    if(s < index) {
      int sentence = textCounter.countSentence(text, s, index);
      int word = textCounter.countWord(text, s, index);
      score += new ScoreCalculator().calculate(pattern, sentence, word);
    }
   
//    System.out.println("================================================================");
    for(int i = 0;  i < nodes.size(); i++) {
View Full Code Here

 
  public int getScore() { return score; }
 
  private void processTextValue() {
    String [] elements = text.split("\n");
    TextCounter textCounter = new TextCounter();
    for(int i = 0; i < elements.length; i++) {
      int value  = textCounter.countSentence(elements[i]);
      if(value > 1) {
//        System.out.println(" : == =======================================");
//        System.out.println("papa a "+ elements[i]);
        paragraph++;
      }
      sentence += value;
     
      word += textCounter.countWords(elements[i]);
    }
  }
View Full Code Here

    return true;
  }

  private void processTextValue() {
    String [] elements = text.split("\n");
    TextCounter textCounter = new TextCounter();
    for(int i = 0; i < elements.length; i++) {
      int value  = textCounter.countSentence(elements[i]);
      if(value > 1) {
//        System.out.println(" : == =======================================");
//        System.out.println("papa a "+ elements[i]);
        paragraph++;
      }
      sentence += value;
     
      word += textCounter.countWords(elements[i]);
    }
  }
View Full Code Here

    }
   
    List<AnalyticsModel> models = new ArrayList<AnalyticsModel>(nodes.length);
    int maxScore = 0;
   
    TextCounter textCounter = new TextCounter();
    ScoreCalculator scoreCalculator = new ScoreCalculator();
    for(int i = 0; i < nodes.length; i++) {
      if(isTextElement(nodes[i])) return;
      else if(isBlockElement(nodes[i])) {
        AnalyticsRenderer renderer = new AnalyticsRenderer(nodes[i], true);
        String text = renderer.getTextValue().toString();
        int sentence = textCounter.countSentence(text, 0, text.length());
        int word = textCounter.countWord(text, 0, text.length());

        int score = scoreCalculator.calculate(sentence, word);
        models.add(new AnalyticsModel(nodes[i], score));
        if(score > maxScore) maxScore = score;
      }
View Full Code Here

  public boolean hasParagraph(HTMLNode node, int max) {
    AnalyticsRenderer renderer = new AnalyticsRenderer(node, true);
    if(renderer.getParagraph() < 1) return false;

    String [] elements = renderer.getTextValue().toString().split("\n");
    TextCounter textCounter = contentChecker.getTextCounter();
    for(String element : elements) {
      int counter = textCounter.countSentence(element);
      if(counter > max) return true;

      counter = textCounter.countWord(element, 0, element.length());
      if(counter >= 15) return true;
    }
    return false;
  }
View Full Code Here

TOP

Related Classes of org.vietspider.common.text.TextCounter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.