/*
* Copyright (C) 2004 Paul Browne, http://www.firstpartners.net,
* built with the help of Fast-Soft (fastsoftdev@yahoo.com)
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
* This product includes software developed by the
* Apache Software Foundation (http://www.apache.org)."
*
* This product includes software developed by the
* Spring Framework Project (http://www.springframework.org)."
*
*/
package net.fp.rp.search.back.extractor.xml;
import net.fp.rp.search.back.struct.NodeStruct;
import net.fp.rp.search.common.util.Util;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Translate from xml strcuture (w3c.dom) into internal Node-structure
*
* @author Firstpartners.net
* @version 1.1
* Copyright @link www.firstpartners.net/red
*/
public class Translator {
/**
* Generate a node struct object using a org.w3c.dom.Node object
*
* @param node XML-object
*
* @return NodeStruct object
*/
public static NodeStruct translate(Node node) {
System.out.println("Translate the node" + node.getNodeName());
if (node == null) {
return null;
}
NodeStruct actual = null;
int type = node.getNodeType();
switch (type) {
// print document
case Node.DOCUMENT_NODE: {
return translate(((Document) node).getDocumentElement() );
}
// print element with attributes
case Node.ELEMENT_NODE: {
System.out.println("Process attributes for node" + node.getNodeName());
actual = new NodeStruct();
StringBuffer buf = new StringBuffer();
NamedNodeMap attrs = node.getAttributes();
int lenAttrs = (attrs != null) ? attrs.getLength() : 0;
for (int i = 0; i < lenAttrs; i++) {
String strValue = attrs.item(i).getNodeValue();
if ((strValue.indexOf('<') != -1) ||
(strValue.indexOf('>') != -1) ||
(strValue.indexOf('&') != -1) ||
(strValue.indexOf('"') != -1) ||
(strValue.indexOf('\'') != -1)) {
actual.addTuple(node.getNodeName(), encode(strValue));
} else {
actual.addTuple(node.getNodeName(), strValue);
}
}
System.out.println("Process the childs");
NodeList children = node.getChildNodes();
int len = children.getLength();
for (int i = 0; i < len; i++) {
System.out.println("Process the childs" + children.item(i).getNodeName());
NodeStruct childNode = translate(children.item(i) );
if (childNode != null) {
System.out.println("Append the child "+children.item(i).getNodeName());
actual.addChild(childNode);
}
}
break;
}
// handle entity reference nodes
case Node.ENTITY_REFERENCE_NODE: {
actual = new NodeStruct();
actual.addTuple(node.getNodeName(), "");
break;
}
// print cdata sections
case Node.CDATA_SECTION_NODE: {
//must to encode this values
actual = new NodeStruct();
actual.addTuple("GENERIC", encode(node.getNodeValue()));
break;
}
// print text
case Node.TEXT_NODE: {
System.out.println("Process the text");
String strValue = node.getNodeValue();
System.out.println("Process the text value " + node.getNodeName() +" /" + strValue );
if ( !Util.isEmpty( strValue )) {
actual = new NodeStruct();
if ((strValue.indexOf('<') != -1) || (strValue.indexOf('>') != -1) ||
(strValue.indexOf('&') != -1) ||
(strValue.indexOf('"') != -1) ||
(strValue.indexOf('\'') != -1)) {
actual.addTuple(node.getNodeName(), encode(strValue));
} else {
actual.addTuple(node.getNodeName(), strValue);
}
}
break;
}
// print processing instruction
case Node.PROCESSING_INSTRUCTION_NODE: {
actual = new NodeStruct();
String data = node.getNodeValue();
if ((data != null) && (data.length() > 0)) {
//data must to be encoded
actual.addTuple(node.getNodeName(), encode(data));
}
break;
}
}
return actual;
}
/**
* Encode the set of special XML characters: < > " ' ?
*
* @param strText <code>String</code> to be encoded
*
* @return encoded <code>String</code>
*/
public static String encode(String strText) {
//bug 1 . - null string
if (strText == null) {
return "";
}
int iOrgLen = strText.length();
StringBuffer sb = new StringBuffer(iOrgLen);
for (int i = 0; i < iOrgLen; i++) {
char c = strText.charAt(i);
// CAUTION: Encoding is done using numeric character references, since
// encoding as & was not resolved properly by the sun parser
if (c == '&') {
sb.append("&");
} else if (c == '<') {
sb.append("<");
} else if (c == '>') {
sb.append(">");
} else if (c == '\'') {
sb.append("'");
} else if (c == '"') {
sb.append(""");
} else {
sb.append(c);
}
}
return sb.toString();
}
}