package org.jsoup.helper;
import static org.junit.Assert.assertEquals;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
public class DataUtilTest {
@Test
public void testCharset() {
assertEquals("utf-8", DataUtil.getCharsetFromContentType("text/html;charset=utf-8 "));
assertEquals("UTF-8", DataUtil.getCharsetFromContentType("text/html; charset=UTF-8"));
assertEquals("ISO-8859-1", DataUtil.getCharsetFromContentType("text/html; charset=ISO-8859-1"));
assertEquals(null, DataUtil.getCharsetFromContentType("text/html"));
assertEquals(null, DataUtil.getCharsetFromContentType(null));
assertEquals(null, DataUtil.getCharsetFromContentType("text/html;charset=Unknown"));
}
@Test public void testQuotedCharset() {
assertEquals("utf-8", DataUtil.getCharsetFromContentType("text/html; charset=\"utf-8\""));
assertEquals("UTF-8", DataUtil.getCharsetFromContentType("text/html;charset=\"UTF-8\""));
assertEquals("ISO-8859-1", DataUtil.getCharsetFromContentType("text/html; charset=\"ISO-8859-1\""));
assertEquals(null, DataUtil.getCharsetFromContentType("text/html; charset=\"Unsupported\""));
}
@Test public void discardsSpuriousByteOrderMark() {
String html = "\uFEFF<html><head><title>One</title></head><body>Two</body></html>";
ByteBuffer buffer = Charset.forName("UTF-8").encode(html);
Document doc = DataUtil.parseByteData(buffer, "UTF-8", "http://foo.com/", Parser.htmlParser());
assertEquals("One", doc.head().text());
}
}