Examples of outputSettings()


Examples of org.jsoup.nodes.Document.outputSettings()

        // tests <meta charset> when preceded by another <meta>
        File in = getFile("/htmltests/baidu-variant.html");
        Document doc = Jsoup.parse(in, null,
            "http://www.baidu.com/"); // http charset is gb2312, but NOT specifying it, to test http-equiv parse
        // check auto-detect from meta
        assertEquals("GB2312", doc.outputSettings().charset().displayName());
        assertEquals("<title>百度一下,你就知道</title>", doc.select("title").outerHtml());
    }

    @Test
    public void testHtml5Charset() throws IOException {
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

    public void testHtml5Charset() throws IOException {
        // test that <meta charset="gb2312"> works
        File in = getFile("/htmltests/meta-charset-1.html");
        Document doc = Jsoup.parse(in, null, "http://example.com/"); //gb2312, has html5 <meta charset>
        assertEquals("新", doc.text());
        assertEquals("GB2312", doc.outputSettings().charset().displayName());

        // double check, no charset, falls back to utf8 which is incorrect
        in = getFile("/htmltests/meta-charset-2.html"); //
        doc = Jsoup.parse(in, null, "http://example.com"); // gb2312, no charset
        assertEquals("UTF-8", doc.outputSettings().charset().displayName());
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

        assertEquals("GB2312", doc.outputSettings().charset().displayName());

        // double check, no charset, falls back to utf8 which is incorrect
        in = getFile("/htmltests/meta-charset-2.html"); //
        doc = Jsoup.parse(in, null, "http://example.com"); // gb2312, no charset
        assertEquals("UTF-8", doc.outputSettings().charset().displayName());
        assertFalse("新".equals(doc.text()));

        // confirm fallback to utf8
        in = getFile("/htmltests/meta-charset-3.html");
        doc = Jsoup.parse(in, null, "http://example.com/"); // utf8, no charset
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

        assertFalse("新".equals(doc.text()));

        // confirm fallback to utf8
        in = getFile("/htmltests/meta-charset-3.html");
        doc = Jsoup.parse(in, null, "http://example.com/"); // utf8, no charset
        assertEquals("UTF-8", doc.outputSettings().charset().displayName());
        assertEquals("新", doc.text());
    }

    @Test
    public void testNytArticle() throws IOException {
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

    @Test
    public void fetchBaidu() throws IOException {
        Connection.Response res = Jsoup.connect("http://www.baidu.com/").timeout(10*1000).execute();
        Document doc = res.parse();

        assertEquals("GBK", doc.outputSettings().charset().displayName());
        assertEquals("GBK", res.charset());
        assert(res.hasCookie("BAIDUID"));
        assertEquals("text/html;charset=gbk", res.contentType());
    }
   
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

            // into head mode
            if (docData.length() > 0 && docData.charAt(0) == 65279)
                docData = docData.substring(1);

            doc = parser.parseInput(docData, baseUri);
            doc.outputSettings().charset(charsetName);
        }
        return doc;
    }

    /**
 
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

        public Document parse() throws IOException {
            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
            Document doc = DataUtil.parseByteData(byteData, charset, url.toExternalForm(), req.parser());
            byteData.rewind();
            charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
            return doc;
        }

        public String body() {
            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

        String style = e.attr("style");
        String escaped = escapeImgCss(style);
        e.attr("style", escaped);
      }

      clean.outputSettings(new OutputSettings().escapeMode(EscapeMode.base).prettyPrint(false));
      Element body = clean.body();
      if (keepTextOnly) {
        content = body.text();
      } else {
        content = body.html();
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

            charsetName = defaultCharset;
            doc = null;
        }
        if (doc == null) {
            doc = parser.parseInput(docData, baseUri);
            doc.outputSettings().charset(charsetName);
        }
        return doc;
    }

    /**
 
View Full Code Here

Examples of org.jsoup.nodes.Document.outputSettings()

    @Test public void discardsSpuriousByteOrderMarkWhenNoCharsetSet() {
        String html = "\uFEFF<html><head><title>One</title></head><body>Two</body></html>";
        ByteBuffer buffer = Charset.forName("UTF-8").encode(html);
        Document doc = DataUtil.parseByteData(buffer, null, "http://foo.com/", Parser.htmlParser());
        assertEquals("One", doc.head().text());
        assertEquals("UTF-8", doc.outputSettings().charset().displayName());
    }

    @Test
    public void shouldNotThrowExceptionOnEmptyCharset() {
        assertEquals(null, DataUtil.getCharsetFromContentType("text/html; charset="));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.