package org.jsoup.parser;
import org.jsoup.Jsoup;
import org.jsoup.TextUtil;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.junit.Ignore;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
/**
* Tests XmlTreeBuilder.
*
* @author Jonathan Hedley
*/
public class XmlTreeBuilderTest {
@Test
public void testSimpleXmlParse() {
String xml = "<doc id=2 href='/bar'>Foo <br /><link>One</link><link>Two</link></doc>";
XmlTreeBuilder tb = new XmlTreeBuilder();
Document doc = tb.parse(xml, "http://foo.com/");
assertEquals("<doc id=\"2\" href=\"/bar\">Foo <br /><link>One</link><link>Two</link></doc>",
TextUtil.stripNewlines(doc.html()));
assertEquals(doc.getElementById("2").absUrl("href"), "http://foo.com/bar");
}
@Test
public void testPopToClose() {
// test: </val> closes Two, </bar> ignored
String xml = "<doc><val>One<val>Two</val></bar>Three</doc>";
XmlTreeBuilder tb = new XmlTreeBuilder();
Document doc = tb.parse(xml, "http://foo.com/");
assertEquals("<doc><val>One<val>Two</val>Three</val></doc>",
TextUtil.stripNewlines(doc.html()));
}
@Test
public void testCommentAndDocType() {
String xml = "<!DOCTYPE html><!-- a comment -->One <qux />Two";
XmlTreeBuilder tb = new XmlTreeBuilder();
Document doc = tb.parse(xml, "http://foo.com/");
assertEquals("<!DOCTYPE html><!-- a comment -->One <qux />Two",
TextUtil.stripNewlines(doc.html()));
}
@Test
public void testSupplyParserToJsoupClass() {
String xml = "<doc><val>One<val>Two</val></bar>Three</doc>";
Document doc = Jsoup.parse(xml, "http://foo.com/", Parser.xmlParser());
assertEquals("<doc><val>One<val>Two</val>Three</val></doc>",
TextUtil.stripNewlines(doc.html()));
}
@Ignore
@Test
public void testSupplyParserToConnection() throws IOException {
String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml";
// parse with both xml and html parser, ensure different
Document xmlDoc = Jsoup.connect(xmlUrl).parser(Parser.xmlParser()).get();
Document htmlDoc = Jsoup.connect(xmlUrl).get();
assertEquals("<doc><val>One<val>Two</val>Three</val></doc>",
TextUtil.stripNewlines(xmlDoc.html()));
assertNotSame(htmlDoc, xmlDoc);
assertEquals(1, htmlDoc.select("head").size()); // html parser normalises
assertEquals(0, xmlDoc.select("head").size()); // xml parser does not
}
@Test
public void testSupplyParserToDataStream() throws IOException, URISyntaxException {
File xmlFile = new File(XmlTreeBuilder.class.getResource("/htmltests/xml-test.xml").toURI());
InputStream inStream = new FileInputStream(xmlFile);
Document doc = Jsoup.parse(inStream, null, "http://foo.com", Parser.xmlParser());
assertEquals("<doc><val>One<val>Two</val>Three</val></doc>",
TextUtil.stripNewlines(doc.html()));
}
@Test
public void testDoesNotForceSelfClosingKnownTags() {
// html will force "<br>one</br>" to "<br />One<br />". XML should be stay "<br>one</br> -- don't recognise tag.
Document htmlDoc = Jsoup.parse("<br>one</br>");
assertEquals("<br />one\n<br />", htmlDoc.body().html());
Document xmlDoc = Jsoup.parse("<br>one</br>", "", Parser.xmlParser());
assertEquals("<br>one</br>", xmlDoc.html());
}
@Test public void handlesXmlDeclarationAsDeclaration() {
String html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->";
Document doc = Jsoup.parse(html, "", Parser.xmlParser());
assertEquals("<?xml encoding='UTF-8' ?> <body> One </body> <!-- comment -->",
StringUtil.normaliseWhitespace(doc.outerHtml()));
assertEquals("#declaration", doc.childNode(0).nodeName());
assertEquals("#comment", doc.childNode(2).nodeName());
}
@Test public void xmlFragment() {
String xml = "<one src='/foo/' />Two<three><four /></three>";
List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/");
assertEquals(3, nodes.size());
assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src"));
assertEquals("one", nodes.get(0).nodeName());
assertEquals("Two", ((TextNode)nodes.get(1)).text());
}
}