package jp.aonir.fuzzyxml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jp.aonir.fuzzyxml.event.FuzzyXMLErrorEvent;
import jp.aonir.fuzzyxml.event.FuzzyXMLErrorListener;
import jp.aonir.fuzzyxml.internal.FuzzyXMLAttributeImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLCDATAImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLCommentImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLDocTypeImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLDocumentImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLElementImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLPreImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLProcessingInstructionImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLScriptImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLStyleImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLTextImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLUtil;
import jp.aonir.fuzzyxml.internal.RenderContext;
import jp.aonir.fuzzyxml.resources.Messages;
import org.objectstyle.wolips.wodclipse.core.util.WodHtmlUtils;
public class FuzzyXMLParser {
private Stack<FuzzyXMLNode> _stack = new Stack<FuzzyXMLNode>();
private String _originalSource;
private List<FuzzyXMLNode> _roots;
private FuzzyXMLDocType _docType;
private List<FuzzyXMLErrorListener> _listeners = new ArrayList<FuzzyXMLErrorListener>();
private List<FuzzyXMLElement> _nonCloseElements = new ArrayList<FuzzyXMLElement>();
private List<String> _looseNamespaces = new ArrayList<String>();
private List<String> _autocloseTags = new ArrayList<String>();
private List<String> _looseTags = new ArrayList<String>();
private boolean _wellFormedRequired = false;
private boolean _isHTML = false;
// �p�[�X�Ɏg�p���鐳�K�\��
private Pattern _tag = Pattern.compile("<((|/)([^<>]*))([^<]?|>)");
// private Pattern attr =
// Pattern.compile("([\\w:]+?)\\s*=(\"|')([^\"]*?)\\2");
private Pattern _docTypeName = Pattern.compile("^<!DOCTYPE[ \r\n\t]+([\\w\\-_]*)");
private Pattern _docTypePublic = Pattern.compile("PUBLIC[ \r\n\t]+\"([^\"]*)\"[ \r\n\t]*\"*([^\">]*)\"*");
private Pattern _docTypeSystem = Pattern.compile("SYSTEM[ \r\n\t]+\"([^\"]*)\"");
private Pattern _docTypeSubset = Pattern.compile("\\[([^\\]]*)\\]>");
private Pattern _invalidStringPattern = Pattern.compile("([<>&])");
private Pattern _preCloseTagPattern = Pattern.compile("<\\s*/\\s*PRE\\s*>", Pattern.CASE_INSENSITIVE);
public FuzzyXMLParser(boolean wellFormedRequired) {
this(wellFormedRequired, false);
}
public FuzzyXMLParser(boolean wellFormedRequired, boolean isHTML) {
super();
_wellFormedRequired = wellFormedRequired;
_roots = new LinkedList<FuzzyXMLNode>();
_isHTML = isHTML;
// MS: Hardcoded that "wo" is a loose namespace
addLooseNamespace("wo");
addLooseNamespace("webobject");
addLooseNamespace("webobjects");
if (!_wellFormedRequired) {
addAutocloseTag("img");
addAutocloseTag("br");
addAutocloseTag("hr");
addAutocloseTag("meta");
addAutocloseTag("link");
addAutocloseTag("input");
addAutocloseTag("spacer");
addAutocloseTag("frame");
addAutocloseTag("basefont");
addAutocloseTag("base");
addAutocloseTag("area");
addAutocloseTag("col");
addAutocloseTag("isindex");
addAutocloseTag("param");
addLooseTag("p");
addLooseTag("li");
}
}
/**
* An autoclose tag is like br or link where it commonly does not have a
* closing tag
* but it also never has contents.
*
* @param autocloseTag
* the name of the tag to make loose
*/
public void addAutocloseTag(String autocloseTag) {
_autocloseTags.add(autocloseTag);
addLooseTag(autocloseTag);
}
/**
* A "loose" tag is like li or p where people lazily often do not close them
* properly,
* but they may have content.
*
* @param looseTag
* the name of the tag to make loose
*/
public void addLooseTag(String looseTag) {
_looseTags.add(looseTag);
}
/**
* A "loose" namespace is like the wo: namespace. We don't actually require
* that
* wo:if have a corresponding wo:if close tag -- it actually just needs a
* wo close tag.
*
* @param namespace
* the name of the namespace to make loose
*/
public void addLooseNamespace(String namespace) {
_looseNamespaces.add(namespace);
}
/**
* �G���[�n���h�����O�p�̃��X�i��lj����܂��B
*
* @param listener
* ���X�i
*/
public void addErrorListener(FuzzyXMLErrorListener listener) {
_listeners.add(listener);
}
private void fireErrorEvent(int offset, int length, String message, FuzzyXMLNode node) {
FuzzyXMLErrorEvent evt = new FuzzyXMLErrorEvent(offset, length, message, node);
for (FuzzyXMLErrorListener listener : _listeners) {
listener.error(evt);
}
}
/**
* ��̓X�g���[������XML�h�L�������g���p�[�X���܂��B
* �����R�[�h��XML�錾�ɂ��������Ĕ��ʂ���܂��B
*
* @param in
* ��̓X�g���[��
* @return �p�[�X����
* @throws IOException
*/
public FuzzyXMLDocument parse(InputStream in) throws IOException {
byte[] bytes = FuzzyXMLUtil.readStream(in);
String encode = FuzzyXMLUtil.getEncoding(bytes);
if (encode == null) {
return parse(new String(bytes));
}
return parse(new String(bytes, encode));
}
/**
* �t�@�C������XML�h�L�������g���p�[�X���܂��B
* �����R�[�h��XML�錾�ɂ��������Ĕ��ʂ���܂��B
*
* @param file
* �t�@�C��
* @return �p�[�X����
* @throws IOException
*/
public FuzzyXMLDocument parse(File file) throws IOException {
byte[] bytes = FuzzyXMLUtil.readStream(new FileInputStream(file));
String encode = FuzzyXMLUtil.getEncoding(bytes);
if (encode == null) {
return parse(new String(bytes));
}
return parse(new String(bytes, encode));
}
protected int _parse(String source, int initialOffset, boolean woOnly, boolean parseAsSynthetic) {
// �p�[�X���J�n
Matcher matcher = _tag.matcher(source);
int lastIndex = initialOffset - 1;
while (matcher.find()) {
int start = matcher.start() + initialOffset;
int end = matcher.end() + initialOffset;
if (lastIndex == -1 && start > 0) {
handleText(0, start, true);
}
else if (lastIndex != (initialOffset - 1) && lastIndex < start) {
handleText(lastIndex, start, true);
}
String originalText = matcher.group(1);
String text = originalText.trim();
// ���^�O
if (!woOnly && text.startsWith("%")) {
// ignore
handleText(start, end, false);
}
else if (!woOnly && text.startsWith("?")) {
handleDeclaration(start, end);
}
else if (!woOnly && (text.startsWith("!DOCTYPE") || text.startsWith("!doctype"))) {
handleDoctype(start, end, text);
}
else if (!woOnly && text.startsWith("![CDATA[")) {
handleCDATA(start, end, _originalSource.substring(start, end));
}
else if (!woOnly && (text.equalsIgnoreCase("pre") || text.toLowerCase().startsWith("pre "))) {
end = handlePreTag(start, end);
matcher.region(end, source.length());
}
else if (text.startsWith("/") && (!woOnly || WodHtmlUtils.isWOTag(text.substring(1)))) {
handleCloseTag(start, end, text);
}
else if (text.endsWith("/") && (!woOnly || WodHtmlUtils.isWOTag(text))) {
if (originalText.endsWith(" ")) {
fireErrorEvent(start, end - start, "You can not have a space between the / and the > in your webobject tags.", null);
}
handleEmptyTag(start, end, parseAsSynthetic);
}
else if (!woOnly && text.startsWith("!--")) {
end = _originalSource.indexOf("-->", start);
if (end > 0) {
end += 3;
}
handleComment(start, end, _originalSource.substring(start, end));
matcher.region(end, source.length());
}
else if (!woOnly || WodHtmlUtils.isWOTag(text)) {
handleStartTag(start, end, parseAsSynthetic);
}
lastIndex = end;
}
return lastIndex;
}
/**
* ��Ƃ��ēn���ꂽXML�\�[�X���p�[�X����FuzzyXMLDocument�I�u�W�F�N�g��ԋp���܂��B
*
* @param source
* XML�\�[�X
* @return �p�[�X���ʂ�FuzzyXMLDocument�I�u�W�F�N�g
*/
public FuzzyXMLDocument parse(String source) {
// �I���W�i���̃\�[�X��ۑ����Ă���
_originalSource = source;
// �R�����g�ACDATA�ADOCTYPE����������
source = FuzzyXMLUtil.comment2space(source, true);
source = FuzzyXMLUtil.escapeScript(source);
source = FuzzyXMLUtil.scriptlet2space(source, true);
source = FuzzyXMLUtil.cdata2space(source, true);
source = FuzzyXMLUtil.doctype2space(source, true);
source = FuzzyXMLUtil.processing2space(source, true);
source = FuzzyXMLUtil.escapeString(source);
int lastIndex = _parse(source, 0, false, false);
if (_stack.size() > 0 && _nonCloseElements.size() > 0) {
FuzzyXMLElementImpl lastElement = (FuzzyXMLElementImpl) _nonCloseElements.get(_nonCloseElements.size() - 1);
String lowercaseLastElementName = lastElement.getName().toLowerCase();
if (!_looseTags.contains(lowercaseLastElementName)) {
fireErrorEvent(lastElement.getOffset(), lastElement.getLength(), Messages.getMessage("error.noCloseTag", lastElement.getName()), null);
}
for (FuzzyXMLNode openNode : _stack) {
if (openNode instanceof FuzzyXMLElementImpl) {
FuzzyXMLElementImpl openElement = (FuzzyXMLElementImpl) openNode;
openElement.setLength(lastIndex - openElement.getOffset());
if (openElement.getParentNode() == null) {
_roots.add(openElement);
}
else {
((FuzzyXMLElementImpl) openElement.getParentNode()).appendChildWithNoCheck(openElement);
}
}
}
}
// MS: Capture trailing text that isn't inside of a tag at all
if (lastIndex != source.length()) {
handleText(Math.max(0, lastIndex), source.length(), true);
}
FuzzyXMLElement docElement = null;
if (_roots.size() == 0) {
docElement = new FuzzyXMLElementImpl(null, "document", 0, _originalSource.length(), 0);
// docElement.appendChild(root);
}
else {
FuzzyXMLNode firstRoot = _roots.get(0);
FuzzyXMLNode lastRoot = _roots.get(_roots.size() - 1);
docElement = new FuzzyXMLElementImpl(null, "document", firstRoot.getOffset(), lastRoot.getOffset() + lastRoot.getLength() - firstRoot.getOffset(), 0);
for (FuzzyXMLNode root : _roots) {
((FuzzyXMLElementImpl) docElement).appendChildWithNoCheck(root);
}
}
FuzzyXMLDocumentImpl doc = new FuzzyXMLDocumentImpl(docElement, _docType);
doc.setHTML(_isHTML);
return doc;
}
/** CDATA�m�[�h���������܂��B */
private void handleCDATA(int offset, int end, String text) {
closeAutocloseTags();
text = text.replaceFirst("<!\\[CDATA\\[", "");
text = text.replaceFirst("\\]\\]>", "");
FuzzyXMLCDATAImpl cdata = new FuzzyXMLCDATAImpl(getParent(), text, offset, end - offset);
if (getParent() != null) {
((FuzzyXMLElement) getParent()).appendChild(cdata);
}
else {
_roots.add(cdata);
}
_stack.push(cdata);
_parse(text, offset + "<![CDATA[".length(), true, true);
FuzzyXMLNode poppedNode = _stack.pop();
if (poppedNode != cdata) {
_stack.push(poppedNode);
}
}
private int handlePreTag(int offset, int end) {
closeAutocloseTags();
String[] content = _preCloseTagPattern.split(_originalSource.substring(end, _originalSource.length()), 2);
String text = content[0];
TagInfo info = parseTagContents(_originalSource.substring(offset + 1, end - 1));
FuzzyXMLPreImpl preNode = new FuzzyXMLPreImpl(getParent(), text, offset, text.length());
handleStartTag(preNode, info, offset, end);
String preBlock = _originalSource.substring(offset, end + text.length() + 1);
return _parse(preBlock, offset, true, false) - 1;
}
/** �e�L�X�g�m�[�h���������܂��B */
private void handleText(int offset, int end, boolean escape) {
String text = _originalSource.substring(offset, end);
// System.out.println("FuzzyXMLParser.handleText: '" + text + "'");
closeAutocloseTags();
FuzzyXMLTextImpl textNode = new FuzzyXMLTextImpl(getParent(), FuzzyXMLUtil.decode(text, _isHTML), offset, end - offset);
textNode.setEscape(escape);
if (getParent() != null) {
((FuzzyXMLElement) getParent()).appendChild(textNode);
}
else {
_roots.add(textNode);
}
}
/** XML�錾�i�������߁j���������܂��B */
private void handleDeclaration(int offset, int end) {
closeAutocloseTags();
String text = _originalSource.substring(offset, end);
text = text.replaceFirst("^<\\?", "");
text = text.replaceFirst("\\?>$", "");
text = text.trim();
String[] dim = text.split("[ \r\n\t]+");
String name = dim[0];
String data = text.substring(name.length()).trim();
FuzzyXMLProcessingInstructionImpl pi = new FuzzyXMLProcessingInstructionImpl(null, name, data, offset, end - offset);
if (getParent() != null) {
// �]�v�ȕ��������
((FuzzyXMLElement) getParent()).appendChild(pi);
}
else {
_roots.add(pi);
}
// XML should not have autoclosing tags
if (name.startsWith("xml")) {
_autocloseTags.clear();
}
}
/** DOCTYPE�錾���������܂��B */
private void handleDoctype(int offset, int end, String text) {
closeAutocloseTags();
if (_docType == null) {
String name = "";
String publicId = "";
String systemId = "";
String internalSubset = "";
text = _originalSource.substring(offset, end);
Matcher matcher = _docTypeName.matcher(text);
if (matcher.find()) {
name = matcher.group(1);
}
matcher = _docTypePublic.matcher(text);
if (matcher.find()) {
publicId = matcher.group(1);
systemId = matcher.group(2);
}
else {
matcher = _docTypeSystem.matcher(text);
if (matcher.find()) {
systemId = matcher.group(1);
}
}
matcher = _docTypeSubset.matcher(text);
if (matcher.find()) {
internalSubset = matcher.group(1);
}
_docType = new FuzzyXMLDocTypeImpl(null, name, publicId, systemId, internalSubset, offset, end - offset);
}
}
private void closeAutocloseTags() {
if (_stack.size() > 0) {
FuzzyXMLElementImpl lastOpenElement = (FuzzyXMLElementImpl) _stack.peek();
String name = lastOpenElement.getName().toLowerCase();
if (_autocloseTags.contains(name) || lastOpenElement.isForbiddenFromHavingChildren()) {
int openTagEndOffset = lastOpenElement.getOffset() + lastOpenElement.getOpenTagLength();
handleCloseTag(openTagEndOffset, openTagEndOffset, "/" + name, false);
}
}
}
/** ���^�O���������܂��B */
private void handleCloseTag(int offset, int end, String text) {
handleCloseTag(offset, end, text, true);
}
private void handleCloseTag(int offset, int end, String text, boolean showMismatchError) {
if (_stack.size() == 0) {
return;
}
String tagName = text.substring(1).trim();
// MS: Chuck does close tags like </webobject closing something else>
int chuckIndex = tagName.indexOf(' ');
if (chuckIndex != -1) {
String chuckWord = tagName.substring(0, chuckIndex);
if (WodHtmlUtils.isWOTag(chuckWord)) {
tagName = chuckWord;
}
}
FuzzyXMLElementImpl lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
String lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
String lowercaseCloseTagName = tagName.toLowerCase();
boolean closeTagMatches = lowercaseLastOpenElementName.equals(lowercaseCloseTagName);
// System.out.println("FuzzyXMLParser.handleCloseTag: lastOpen = " +
// lowercaseLastOpenElementName + ", close = " + lowercaseCloseTagName);
if (!closeTagMatches) {
closeAutocloseTags();
// Allow </wo> to close </wo:if>
boolean looseNamespace = false;
int colonIndex = lowercaseLastOpenElementName.indexOf(':');
if (colonIndex != -1) {
String elementNamespace = lowercaseLastOpenElementName.substring(0, colonIndex);
if (lowercaseCloseTagName.equals(elementNamespace) && _looseNamespaces.contains(elementNamespace)) {
tagName = lastOpenElement.getName();
lowercaseCloseTagName = lowercaseLastOpenElementName;
looseNamespace = true;
}
}
if (!looseNamespace) {
boolean looseTag = false;
if (_looseTags.contains(lowercaseLastOpenElementName)) {
looseTag = true;
}
if (looseTag) {
while (lowercaseLastOpenElementName != null && !lowercaseLastOpenElementName.equals(lowercaseCloseTagName) && _looseTags.contains(lowercaseLastOpenElementName)) {
int lastOpenElementEndOffset = end;
// int lastOpenElementEndOffset = lastOpenElement.getOffset() +
// lastOpenElement.getLength();
_stack.push(lastOpenElement);
handleCloseTag(lastOpenElementEndOffset, lastOpenElementEndOffset, "/" + lastOpenElement.getName(), false);
/*
* FuzzyXMLElement looseElement = lastOpenElement;
* FuzzyXMLNode[] looseElementChildren =
* lastOpenElement.getChildren();
* FuzzyXMLElement looseElementParent = (FuzzyXMLElement)
* lastOpenElement.getParentNode();
* for (FuzzyXMLNode looseElementChild : looseElementChildren) {
* looseElement.removeChild(looseElementChild);
* looseElementParent.insertAfter(looseElementChild, looseElement);
* //((AbstractFuzzyXMLNode)
* looseElementChild).setOffset(looseElementChild.getOffset() + 1);
* }
*/
if (_stack.size() == 0) {
lastOpenElement = null;
lowercaseLastOpenElementName = null;
}
else {
lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
}
}
}
else {
FuzzyXMLElement matchingOpenElement = null;
for (FuzzyXMLElement nonCloseElement : _nonCloseElements) {
if (nonCloseElement.getName().equalsIgnoreCase(lowercaseCloseTagName)) {
matchingOpenElement = nonCloseElement;
}
}
if (matchingOpenElement == null) {
if (showMismatchError) {
fireErrorEvent(offset, end - offset, Messages.getMessage("error.noStartTag", tagName), null);
}
_stack.push(lastOpenElement);
return;
}
// System.out.println("FuzzyXMLParser.handleCloseTag: mismatched close "
// + lastOpenElement.getName());
if (showMismatchError) {
// fireErrorEvent(offset, end - offset, "Found </" + tagName +
// "> before </" + lastOpenElement.getName() + ">", null);
fireErrorEvent(lastOpenElement.getOffset(), lastOpenElement.getLength(), "Missing </" + lastOpenElement.getName() + "> tag", null);
}
_stack.push(lastOpenElement);
handleCloseTag(offset, offset, "/" + lastOpenElement.getName(), false);
lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
}
/*
* boolean matchesOpenElement = false;
* if (looseTag) {
* for (FuzzyXMLElement nonCloseElement : nonCloseElements) {
* if
* (nonCloseElement.getName().equalsIgnoreCase(lowercaseCloseTagName)) {
* matchesOpenElement = true;
* }
* }
* if (matchesOpenElement) {
* nonCloseElements.remove(lastOpenElement);
* }
* }
*
* if (lastOpenElement.getParentNode() != null) {
* ((FuzzyXMLElementImpl)
* lastOpenElement.getParentNode()).appendChildWithNoCheck
* (lastOpenElement);
* FuzzyXMLNode[] nodes = lastOpenElement.getChildren();
* for (int i = 0; i < nodes.length; i++) {
* ((AbstractFuzzyXMLNode)
* nodes[i]).setParentNode(lastOpenElement.getParentNode());
* lastOpenElement.removeChild(nodes[i]);
* ((FuzzyXMLElementImpl)
* lastOpenElement.getParentNode()).appendChildWithNoCheck(nodes[i]);
* }
* }
* else {
* //System.out.println(tagName + "�̊J�n�^�O��������܂���B");
* fireErrorEvent(offset, end - offset,
* Messages.getMessage("error.noStartTag", tagName), null);
* }
* if (matchesOpenElement) {
* handleCloseTag(offset, end, text);
* }
* // stack.push(element);
* return;
*/
}
}
if (lastOpenElement != null) {
// ��^�O�̏ꍇ�͋�̃e�L�X�g�m�[�h��lj����Ă���
if (lastOpenElement.getChildren().length == 0) {
// MS: Hopefully this doesn't break things ... Sure wish I could read
// Japanese to know what the original author said about this :)
// lastOpenElement.appendChild(new FuzzyXMLTextImpl(getParent(), "",
// offset, 0));
}
lastOpenElement.setLength(end - lastOpenElement.getOffset());
if (closeTagMatches) {
lastOpenElement.setCloseTagOffset(offset);
lastOpenElement.setCloseTagLength(end - offset - 2);
lastOpenElement.setCloseNameOffset(text.indexOf(tagName));
}
_nonCloseElements.remove(lastOpenElement);
if (lastOpenElement.getParentNode() == null) {
_roots.add(lastOpenElement);
for (FuzzyXMLElement error : _nonCloseElements) {
// System.out.println(error.getName() + "�͕��Ă��܂���B");
if (showMismatchError) {
fireErrorEvent(error.getOffset(), error.getLength(), Messages.getMessage("error.noCloseTag", error.getName()), error);
}
}
}
else {
((FuzzyXMLElementImpl) lastOpenElement.getParentNode()).appendChildWithNoCheck(lastOpenElement);
}
}
}
private void checkAttributeValue(FuzzyXMLAttribute attr) {
String str = attr.getRawValue();
if (str != null) {
// MS: Don't consider nested tags for escaping ...
if (attr.hasNestedTag()) {
str = str.replaceAll("<[^>]*>", "");
}
str = str.replaceAll("&[^&; \"]+;", " ");
Matcher invalidStringMatcher = _invalidStringPattern.matcher(str);
while (invalidStringMatcher.find()) {
String invalidPart = invalidStringMatcher.group();
fireErrorEvent(attr.getParentNode().getOffset() + attr.getValueDataOffset() + 1, attr.getValueDataLength(), "The character '" + invalidPart + "' must be escaped.", attr);
}
}
}
/** ��^�O���������܂��B */
private void handleEmptyTag(int offset, int end, boolean synthetic) {
closeAutocloseTags();
TagInfo info = parseTagContents(_originalSource.substring(offset + 1, end - 1));
FuzzyXMLNode parent = getParent();
FuzzyXMLElementImpl element = new FuzzyXMLElementImpl(parent, info.name, offset, end - offset, info.nameOffset);
if (parent == null) {
_roots.add(element);
}
else {
((FuzzyXMLElement) parent).appendChild(element);
}
// ������lj�
AttrInfo[] attrs = info.getAttrs();
for (int i = 0; i < attrs.length; i++) {
FuzzyXMLAttributeImpl attr = createFuzzyXMLAttribute(element, offset, attrs[i]);
element.appendChild(attr);
}
element.setSynthetic(synthetic);
checkElement(element);
}
protected void checkElement(FuzzyXMLElement element) {
for (FuzzyXMLAttribute attr : element.getAttributes()) {
if (!_wellFormedRequired) {
if (!WodHtmlUtils.isWOTag((FuzzyXMLElement) attr.getParentNode())) {
_stack.push(attr.getParentNode());
_parse(attr.getValue(), element.getOffset() + attr.getValueDataOffset() + 1, true, true);
FuzzyXMLNode poppedNode = _stack.pop();
if (poppedNode != attr.getParentNode()) {
_stack.push(poppedNode);
}
}
}
else {
checkAttributeValue(attr);
}
}
}
/** �R�����g���������܂��B */
private void handleComment(int offset, int end, String text) {
closeAutocloseTags();
FuzzyXMLNode parent = getParent();
FuzzyXMLCommentImpl comment = new FuzzyXMLCommentImpl(parent, text, offset, end - offset);
if (parent == null) {
_roots.add(comment);
}
else {
((FuzzyXMLElement) parent).appendChild(comment);
}
_stack.push(comment);
_parse(text.replaceFirst("<[^>]+-->$", ""), offset, true, true);
FuzzyXMLNode poppedNode = _stack.pop();
if (poppedNode != comment) {
_stack.push(poppedNode);
}
}
/** �J�n�^�O���������܂��B */
private void handleStartTag(int offset, int end, boolean synthetic) {
closeAutocloseTags();
String tagContents = _originalSource.substring(offset, end);
// MS: If you're in the middle of typing, offset + 1 to end - 1 can put
// you in an invalid state (for instance, if you just type "<" that will
// overlap.
if (tagContents.startsWith("<")) {
tagContents = tagContents.substring(1);
}
if (tagContents.endsWith(">")) {
tagContents = tagContents.substring(0, tagContents.length() - 1);
}
TagInfo info = parseTagContents(tagContents);
// System.out.println("FuzzyXMLParser.handleStartTag: open " + info.name);
FuzzyXMLElement element;
if (info.name.equalsIgnoreCase("script")) {
element = new FuzzyXMLScriptImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
}
else if (info.name.equalsIgnoreCase("style")) {
element = new FuzzyXMLStyleImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
}
else {
element = new FuzzyXMLElementImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
}
handleStartTag(element, info, offset, end);
element.setSynthetic(synthetic);
}
protected FuzzyXMLAttributeImpl createFuzzyXMLAttribute(FuzzyXMLElement element, int offset, AttrInfo attrInfo) {
String namespace = null;
String name = attrInfo.name;
if (name != null) {
int colonIndex = name.indexOf(':');
if (colonIndex != -1) {
namespace = name.substring(0, colonIndex);
name = name.substring(colonIndex + 1);
}
}
if (_wellFormedRequired) {
FuzzyXMLAttributeImpl attr = new FuzzyXMLAttributeImpl(element, namespace, name, FuzzyXMLUtil.decode(attrInfo.value, false), attrInfo.rawValue, attrInfo.offset + offset, attrInfo.end - attrInfo.offset + 1, attrInfo.valueOffset);
attr.setHasNestedTag(attrInfo.hasNestedTag);
attr.setQuoteCharacter(attrInfo.quote);
return attr;
}
FuzzyXMLAttributeImpl attr = new FuzzyXMLAttributeImpl(element, namespace, name, attrInfo.value, attrInfo.rawValue, attrInfo.offset + offset, attrInfo.end - attrInfo.offset + 1, attrInfo.valueOffset);
attr.setHasNestedTag(attrInfo.hasNestedTag);
attr.setQuoteCharacter(attrInfo.quote);
if (attrInfo.value.indexOf('"') >= 0 || attrInfo.value.indexOf('\'') >= 0 || attrInfo.value.indexOf('<') >= 0 || attrInfo.value.indexOf('>') >= 0 || attrInfo.value.indexOf('&') >= 0) {
attr.setEscape(false);
}
return attr;
}
/** �J�n�^�O���������܂��B */
private void handleStartTag(FuzzyXMLElement element, TagInfo info, int offset, int end) {
// ������lj�
AttrInfo[] attrs = info.getAttrs();
for (int i = 0; i < attrs.length; i++) {
// // ���O��Ԃ̃T�|�[�g
// if(attrs[i].name.startsWith("xmlns")){
// String uri = attrs[i].value;
// String prefix = null;
// String[] dim = attrs[i].name.split(":");
// if(dim.length > 1){
// prefix = dim[1];
// }
// element.addNamespaceURI(prefix,uri);
// }
element.appendChild(createFuzzyXMLAttribute(element, offset, attrs[i]));
}
_stack.push(element);
_nonCloseElements.add(element);
checkElement(element);
}
/** �X�^�b�N�̍Ō�̗v�f���擾���܂�(�X�^�b�N����͍폜���܂���)�B */
private FuzzyXMLNode getParent() {
if (_stack.size() == 0) {
return null;
}
return _stack.get(_stack.size() - 1);
}
/** �^�O�������p�[�X���܂��B */
private TagInfo parseTagContents(String text) {
// �g����
Range trimmedRange = Range.trimmedRange(text);
text = trimmedRange.trim(text);
// ���^�O��������Ō�̃X���b�V�����폜
if (text.endsWith("/")) {
text = text.substring(0, text.length() - 1);
}
// �ŏ��̃X�y�[�X�܂ł��^�O��
TagInfo info = new TagInfo();
if (FuzzyXMLUtil.getSpaceIndex(text) != -1) {
info.name = text.substring(0, FuzzyXMLUtil.getSpaceIndex(text)).trim();
info.nameOffset = trimmedRange.getOffset();
parseAttributeContents(info, text);
}
else {
info.name = text;
}
return info;
}
private static enum AttributeParseState {
Start, BeforeAttributeName, InAttributeName, AfterAttributeName, InAttributeValue, InNestedTag,
}
/** �A�g���r���[�g�������p�[�X���܂��B */
private void parseAttributeContents(TagInfo info, String text) {
AttributeParseState state = AttributeParseState.Start;
StringBuffer tokenBuffer = new StringBuffer();
String name = null;
char quoteCharacter = 0;
int start = -1;
int valueOffset = -1;
boolean escape = false;
boolean hasNestedTag = false;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (state == AttributeParseState.Start && FuzzyXMLUtil.isWhitespace(c)) {
state = AttributeParseState.BeforeAttributeName;
}
else if (state == AttributeParseState.BeforeAttributeName && !FuzzyXMLUtil.isWhitespace(c)) {
if (start == -1) {
start = i;
}
state = AttributeParseState.InAttributeName;
tokenBuffer.append(c);
}
else if (state == AttributeParseState.InAttributeName) {
if (c == '=') {
state = AttributeParseState.AfterAttributeName;
name = tokenBuffer.toString().trim();
tokenBuffer.setLength(0);
valueOffset = -1;
}
else {
tokenBuffer.append(c);
}
}
else if (state == AttributeParseState.AfterAttributeName && !FuzzyXMLUtil.isWhitespace(c)) {
if (valueOffset == -1) {
valueOffset = i;
}
if (c == '\'' || c == '\"') {
quoteCharacter = c;
}
else {
quoteCharacter = 0;
tokenBuffer.append(c);
}
state = AttributeParseState.InAttributeValue;
}
else if (state == AttributeParseState.InAttributeValue) {
if (c == quoteCharacter && escape == true) {
tokenBuffer.append(c);
escape = false;
}
else if (c == quoteCharacter || (quoteCharacter == 0 && FuzzyXMLUtil.isWhitespace(c))) {
// add an attribute
AttrInfo attr = new AttrInfo();
attr.name = FuzzyXMLUtil.decode(name, _isHTML);
attr.rawValue = tokenBuffer.toString();
attr.value = FuzzyXMLUtil.decode(attr.rawValue, _isHTML);
attr.valueOffset = valueOffset;
attr.offset = start;
attr.end = i + 1;
attr.quote = quoteCharacter;
attr.hasNestedTag = hasNestedTag;
info.addAttr(attr);
// reset
tokenBuffer.setLength(0);
state = AttributeParseState.BeforeAttributeName;
start = -1;
hasNestedTag = false;
}
else if (c == '\\') {
if (escape == true) {
tokenBuffer.append(c);
escape = false;
}
else {
// MS: I took out escaping .. This is potentially a really sketchy
// thing to do, but it
// was breaking attributes like numberformat = "\$#,##0.00"
// Q: moved append to following 'else' block
escape = true;
}
}
else if (c == '<') {
hasNestedTag = true;
state = AttributeParseState.InNestedTag;
tokenBuffer.append(c);
}
else {
if (escape) {
tokenBuffer.append('\\');
escape = false;
}
tokenBuffer.append(c);
}
}
else if (state == AttributeParseState.InNestedTag) {
tokenBuffer.append(c);
if (c == '>') {
state = AttributeParseState.InAttributeValue;
}
}
}
if ((state == AttributeParseState.InAttributeValue || state == AttributeParseState.InNestedTag) && quoteCharacter == 0) {
AttrInfo attr = new AttrInfo();
attr.name = FuzzyXMLUtil.decode(name, _isHTML);
attr.rawValue = tokenBuffer.toString();
attr.value = FuzzyXMLUtil.decode(attr.rawValue, _isHTML);
attr.valueOffset = valueOffset;
attr.offset = start;
attr.end = text.length();
attr.quote = quoteCharacter;
attr.hasNestedTag = hasNestedTag;
info.addAttr(attr);
}
if (state == AttributeParseState.InAttributeValue && quoteCharacter != 0) {
// System.out.println("FuzzyXMLParser.parseAttributeContents: " +
// info.name);
}
// Matcher matcher = attr.matcher(text);
// while(matcher.find()){
// AttrInfo attr = new AttrInfo();
// attr.name = matcher.group(1);
// attr.value = FuzzyXMLUtil.decode(matcher.group(3));
// attr.offset = matcher.start();
// attr.end = matcher.end();
// info.addAttr(attr);
// }
}
private class TagInfo {
private String name;
private int nameOffset;
private ArrayList<AttrInfo> attrs = new ArrayList<AttrInfo>();
public void addAttr(AttrInfo attr) {
// �������̂������Ă��lj����Ȃ�
AttrInfo[] info = getAttrs();
for (int i = 0; i < info.length; i++) {
if (info[i].name.equals(attr.name)) {
return;
}
}
attrs.add(attr);
}
public AttrInfo[] getAttrs() {
return attrs.toArray(new AttrInfo[attrs.size()]);
}
}
private class AttrInfo {
private String name;
private String value;
private String rawValue;
private int offset;
private int valueOffset;
private int end;
private char quote;
private boolean hasNestedTag;
}
public static class Range {
private int _offset;
private int _length;
public Range() {
}
public int getOffset() {
return _offset;
}
public int getLength() {
return _length;
}
public String trim(String str) {
return str.substring(_offset, _offset + _length);
}
public static Range trimmedRange(String str) {
int i = 0;
int length = str.length();
Range r = new Range();
for (i = 0; i < length && str.charAt(i) <= ' '; i++) {
// DO NOTHING
}
r._offset = i;
for (i = length - 1; i > r._offset && str.charAt(i) <= ' '; i--) {
// DO NOTHING
}
r._length = (i - r._offset + 1);
return r;
}
}
}