/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* CVS $Id: DOMCompressor.java,v 1.11 2004/02/08 03:11:56 vgritsenko Exp $
*/
package org.apache.xindice.xml.dom;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xindice.util.XindiceException;
import org.apache.xindice.xml.Signatures;
import org.apache.xindice.xml.SymbolTable;
import org.apache.xindice.xml.XMLCompressedOutput;
import org.w3c.dom.Attr;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
/**
* DOMCompressor is an OutputStream extension that provides functions for
* writing DOM types to a Xindice Compressed XML Stream.
*
* @version CVS $Revision: 1.11 $, $Date: 2004/02/08 03:11:56 $
*/
public final class DOMCompressor extends XMLCompressedOutput {
private static final Log log = LogFactory.getLog(DOMCompressor.class);
public DOMCompressor(OutputStream os, SymbolTable st) {
super(os, st);
}
/**
* writeNode writes a Node to the compressed output stream. This method
* is recursive and will write all children of the specific Node.
*
* @param node The Node to write
* @throws IOException If the write failed
*/
public void writeNode(Node node) throws IOException {
if (node instanceof NodeImpl) {
NodeImpl impl = (NodeImpl) node;
if (!impl.dirty && impl.data != null) {
write(impl.data, impl.pos, impl.len);
flush();
return;
}
}
// Have to do it manually
switch (node.getNodeType()) {
case Node.ELEMENT_NODE:
{
String nsURI = node.getNamespaceURI();
short symbolID;
if (nsURI != null) {
symbolID = st.getSymbol(node.getNodeName(), nsURI, true);
} else {
symbolID = st.getSymbol(node.getNodeName(), true);
}
byte signature = (byte) (Signatures.Elem << 0x6);
int attrLen = node.getAttributes().getLength();
int attrSize = getSizeType(attrLen);
if (attrLen > 0) {
signature |= (byte) (Signatures.True << 0x5);
}
if (node.hasChildNodes()) {
signature |= (byte) (Signatures.True << 0x4);
}
signature |= (byte) (attrSize);
byte[] children = buildChildren(node);
int valLen = children.length;
int sizeType = getSizeType(valLen + 11);
signature |= (byte) (sizeType << 0x2);
writeByte(signature);
valLen += (getSizeSize(sizeType) + 3);
writeSize(sizeType, valLen);
writeShort(symbolID);
write(children);
break;
}
case Node.ATTRIBUTE_NODE:
{
Attr attr = (Attr) node;
String nsURI = attr.getNamespaceURI();
short symbolID;
if (nsURI != null) {
symbolID = st.getSymbol(attr.getName(), nsURI, true);
} else {
symbolID = st.getSymbol(attr.getName(), true);
}
writeShort(symbolID);
byte[] b = attr.getValue().getBytes("UTF-8");
writeShort((short) b.length);
write(b);
break;
}
case Node.TEXT_NODE:
{
byte[] b = node.getNodeValue().getBytes("UTF-8");
int valLen = b.length;
int sizeType = getSizeType(valLen + 5);
byte signature = (byte) (Signatures.Char << 0x6);
signature |= (byte) (sizeType << 0x2);
writeByte(signature);
valLen += (getSizeSize(sizeType) + 1);
writeSize(sizeType, valLen);
write(b);
break;
}
case Node.CDATA_SECTION_NODE:
{
byte[] b = node.getNodeValue().getBytes("UTF-8");
int valLen = b.length;
byte signature = (byte) (Signatures.Decl << 0x6);
signature |= (byte) (Signatures.DeclCData << 0x2);
writeByte(signature);
valLen += 5;
writeInt(valLen);
write(b);
break;
}
case Node.ENTITY_REFERENCE_NODE:
{
String value = node.getNodeName();
byte signature = (byte) (Signatures.Char << 0x6);
signature |= (byte) (Signatures.CharEntity << 0x4);
short symbol = 0;
int encoding = 0;
if (value.equals("&")) {
signature |= (byte) (Signatures.EntAmp);
} else if (value.equals("<")) {
signature |= (byte) (Signatures.EntLt);
} else if (value.equals(">")) {
signature |= (byte) (Signatures.EntGt);
} else if (value.equals(""")) {
signature |= (byte) (Signatures.EntQuot);
} else if (value.equals("'")) {
signature |= (byte) (Signatures.EntApos);
} else if (value.startsWith("&#x")) {
encoding = 1;
signature |= (byte) (Signatures.EntUnicode);
// Convert the Unicode to a short
} else {
encoding = 2;
symbol = st.getSymbol(value, true);
signature |= (byte) (Signatures.EntDefined);
}
writeByte(signature);
if (encoding > 0) {
writeShort(symbol);
}
break;
}
case Node.ENTITY_NODE:
{
break;
}
case Node.PROCESSING_INSTRUCTION_NODE:
{
String value = node.getNodeName() + " " + node.getNodeValue();
byte[] b = value.getBytes("UTF-8");
int valLen = b.length;
byte signature = (byte) (Signatures.Proc << 0x6);
writeByte(signature);
valLen += 5;
writeInt(valLen);
write(b);
break;
}
case Node.COMMENT_NODE:
{
byte[] b = node.getNodeValue().getBytes("UTF-8");
int valLen = b.length;
byte signature = (byte) (Signatures.Decl << 0x6);
signature |= (byte) (Signatures.DeclComment << 0x2);
writeByte(signature);
valLen += 5;
writeInt(valLen);
write(b);
break;
}
case Node.DOCUMENT_NODE:
{
byte[] children = buildChildren(node);
writeInt(children.length);
write(children);
break;
}
case Node.DOCUMENT_TYPE_NODE:
{
// Errr?
break;
}
case Node.DOCUMENT_FRAGMENT_NODE:
{
byte[] children = buildChildren(node);
writeInt(children.length);
write(children);
break;
}
case Node.NOTATION_NODE:
{
// byte signature = (byte)(Signatures.Decl << 0x6);
break;
}
default:
if (log.isWarnEnabled()) {
log.warn("invalid node type : " + node.getNodeType());
}
}
flush();
}
/**
* buildChildren builds a byte array containing the child definitions
* for the specified Node. Because Xindice's compressed system is a
* depth-first system that requires the length of nested data sets in
* order to allow quick traversal, this process must build the byte
* arrays in memory before including them in their parents.
* <br>
* If a Node has already been graphed and has not been modified, it's
* byte array portion is simply copied into the stream without
* requiring a regraph. This allows for quickly streaming out a modified
* DOM tree, but will take some time in streaming a brand new DOM tree.
*
* @param node The Node whose children to build
* @return The child byte array
* @throws IOException If the build failed
*/
private byte[] buildChildren(Node node) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
BufferedOutputStream buf = new BufferedOutputStream(bos, 4096);
DOMCompressor out = new DOMCompressor(buf, st);
if (node.getNodeType() == Node.ELEMENT_NODE) {
NamedNodeMap attrs = node.getAttributes();
int len = attrs.getLength();
int sizeType = getSizeType(len);
out.writeSize(sizeType, len);
for (int i = 0; i < len; i++) {
out.writeNode(attrs.item(i));
}
}
NodeList children = node.getChildNodes();
int len = children.getLength();
for (int i = 0; i < len; i++) {
out.writeNode(children.item(i));
}
out.flush();
return bos.toByteArray();
}
/**
* Compress is a convenience method that compresses a Node into a byte
* array with a single call.
*
* @param node The Node to compress
* @param symbols The Symbol Table to use
* @return The resulting byte array
* @throws XindiceException if an Exception occurs
*/
public static byte[] Compress(Node node, SymbolTable symbols) throws XindiceException {
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
BufferedOutputStream buf = new BufferedOutputStream(bos, 4096);
DOMCompressor xco = new DOMCompressor(buf, symbols);
node.normalize();
xco.writeNode(node);
xco.flush();
return bos.toByteArray();
} catch (Exception e) {
throw new XindiceException("XML Compression Error", e);
}
}
}