Package us.codecraft.learning.parser

Source Code of us.codecraft.learning.parser.PageErrorChecker

package us.codecraft.learning.parser;

import org.jsoup.Jsoup;
import org.jsoup.parser.ParseError;
import org.jsoup.parser.Parser;

import java.io.IOException;
import java.util.List;

/**
* @author code4crafter@gmail.com
*/
public class PageErrorChecker {

    public static List<ParseError> check(String url) throws IOException {
        Parser parser = Parser.htmlParser();
        parser.setTrackErrors(100);
        String body = Jsoup.connect(url).userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36")
                .execute().body();
        parser.parseInput(body, url);
        List<ParseError> errors = parser.getErrors();
        return errors;
    }

    public static void main(String[] args) throws IOException {
        List<ParseError> check = check("http://www.dianping.com");
        System.out.println(check);
    }
}
TOP

Related Classes of us.codecraft.learning.parser.PageErrorChecker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.