Package org.htmlparser

Examples of org.htmlparser.Parser.extractAllNodesThatMatch()


    Parser parser;
    NodeList list;
    List<String> versionsList = new ArrayList<String>();
    try {
      parser = new Parser (repositoryUrl);
      list = parser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
    } catch (ParserException e) {
      System.err.println("Unable to read repository " + repositoryUrl);
      return null;
    }
   
View Full Code Here


    Parser parser;
    NodeList list;
    List<String> versionsList = new ArrayList<String>();
    try {
      parser = new Parser (url);
      list = parser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
    } catch (ParserException e) {
      System.err.println("Unable to read repository " + url);
      return null;
    }
   
View Full Code Here

    if (baseUrl == null)
      return null;

    final HashSet<String> set = new HashSet<>();
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(LINK_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      LinkTag node = (LinkTag) it.nextNode();
      String link = node.getLink().trim();
      // remove the anchor if present
View Full Code Here

   * @return never null, just an empty string if not parsable.
   */
  public static String extractTitle(String html) throws ParserException {
    String title = "";
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      TitleTag node = (TitleTag) it.nextNode();
      title = node.getTitle().trim();
    }
View Full Code Here

        final Parser htmlParser = initParser(html);
        final AndFilter filter = new AndFilter(new TagNameFilter(TABLE_TAG_NAME),
                new HasAttributeFilter(CLASS_ATTR_NAME, "details"));

        NodeList reportNode = htmlParser.extractAllNodesThatMatch(filter);
        if (reportNode != null && reportNode.size() > 0) {
            source = ((TableTag) reportNode.elements().nextNode()).toHtml(true);
        }

        return source;
View Full Code Here

        if (content == null || content.length == 0) {
            return null;
        }
        Parser parser = Parser.createParser(new String(content), null);
        try {
            NodeList nodelist = parser.extractAllNodesThatMatch(new NodeFilter() {
    public boolean accept(Node node) {
                    return true;
                }
            });
            return nodelist;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.