Package net.fp.rp.search.back.extractor.xml

Source Code of net.fp.rp.search.back.extractor.xml.Translator

/*
* Copyright (C) 2004 Paul Browne, http://www.firstpartners.net,
* built with the help of Fast-Soft (fastsoftdev@yahoo.com)
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
* This product includes software developed by the
* Apache Software Foundation (http://www.apache.org)."
*
* This product includes software developed by the
* Spring Framework Project (http://www.springframework.org)."
*
*/
package net.fp.rp.search.back.extractor.xml;


import net.fp.rp.search.back.struct.NodeStruct;
import net.fp.rp.search.common.util.Util;

import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


/**
* Translate from xml strcuture (w3c.dom) into internal Node-structure  
*
* @author Firstpartners.net
* @version 1.1
* Copyright @link www.firstpartners.net/red
*/
public class Translator {
    /**
     * Generate a node struct object using a org.w3c.dom.Node object
     *
     * @param node XML-object
     *
     * @return NodeStruct object
     */
    public static NodeStruct translate(Node node) {
        System.out.println("Translate the node" + node.getNodeName());
        if (node == null) {
            return null;
        }

        NodeStruct actual = null;
        int type = node.getNodeType();

        switch (type) {
        // print document
        case Node.DOCUMENT_NODE: {
            return translate(((Document) node).getDocumentElement() );
        }

        // print element with attributes
        case Node.ELEMENT_NODE: {
            System.out.println("Process attributes for node" + node.getNodeName());
           
            actual = new NodeStruct();

            StringBuffer buf = new StringBuffer();
            NamedNodeMap attrs = node.getAttributes();
            int lenAttrs = (attrs != null) ? attrs.getLength() : 0;

            for (int i = 0; i < lenAttrs; i++) {
                String strValue = attrs.item(i).getNodeValue();

                if ((strValue.indexOf('<') != -1) ||
                        (strValue.indexOf('>') != -1) ||
                        (strValue.indexOf('&') != -1) ||
                        (strValue.indexOf('"') != -1) ||
                        (strValue.indexOf('\'') != -1)) {
                    actual.addTuple(node.getNodeName(), encode(strValue));
                } else {
                    actual.addTuple(node.getNodeName(), strValue);
                }
            }
            System.out.println("Process the childs");
           
            NodeList children = node.getChildNodes();
            int len = children.getLength();

            for (int i = 0; i < len; i++) {
                System.out.println("Process the childs"  + children.item(i).getNodeName());
               
                NodeStruct childNode = translate(children.item(i) );

                if (childNode != null) {
                    System.out.println("Append the child "+children.item(i).getNodeName());
                    actual.addChild(childNode);
                }
            }
            break;
        }

        // handle entity reference nodes
        case Node.ENTITY_REFERENCE_NODE: {
            actual = new NodeStruct();
            actual.addTuple(node.getNodeName(), "");

            break;
        }

        // print cdata sections
        case Node.CDATA_SECTION_NODE: {
            //must to encode this values
            actual = new NodeStruct();
            actual.addTuple("GENERIC", encode(node.getNodeValue()));
            break;
        }

        // print text
        case Node.TEXT_NODE: {
            System.out.println("Process the text");
            String strValue = node.getNodeValue();
            System.out.println("Process the text value "  + node.getNodeName() +" /" + strValue );

            if ( !Util.isEmpty( strValue )) {
                actual = new NodeStruct();
 
              if ((strValue.indexOf('<') != -1) || (strValue.indexOf('>') != -1) ||
                      (strValue.indexOf('&') != -1) ||
                      (strValue.indexOf('"') != -1) ||
                      (strValue.indexOf('\'') != -1)) {
                  actual.addTuple(node.getNodeName(), encode(strValue));
              } else {
                  actual.addTuple(node.getNodeName(), strValue);
              }
            }

            break;
        }

        // print processing instruction
        case Node.PROCESSING_INSTRUCTION_NODE: {
            actual = new NodeStruct();
            String data = node.getNodeValue();
            if ((data != null) && (data.length() > 0)) {
                //data must to be encoded
                actual.addTuple(node.getNodeName(), encode(data));
            }
            break;
        }
        }

        return actual;
    }

    /**
     * Encode the set of special XML characters: < > " ' ?
     *
     * @param strText <code>String</code> to be encoded
     *
     * @return encoded <code>String</code>
     */
    public static String encode(String strText) {
        //bug 1 . - null string
        if (strText == null) {
            return "";
        }

        int iOrgLen = strText.length();
        StringBuffer sb = new StringBuffer(iOrgLen);

        for (int i = 0; i < iOrgLen; i++) {
            char c = strText.charAt(i);

            // CAUTION: Encoding is done using numeric character references, since
            // encoding as &amp; was not resolved properly by the sun parser
            if (c == '&') {
                sb.append("&#38;");
            } else if (c == '<') {
                sb.append("&#60;");
            } else if (c == '>') {
                sb.append("&#62;");
            } else if (c == '\'') {
                sb.append("&#39;");
            } else if (c == '"') {
                sb.append("&#34;");
            } else {
                sb.append(c);
            }
        }

        return sb.toString();
    }
}
TOP

Related Classes of net.fp.rp.search.back.extractor.xml.Translator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.