/*************************************************************************
*
* $RCSfile: DocumentFragmentServerImpl.java,v $
*
* $Revision: 1.1 $
*
* last change: $Author: abi $ $Date: 2000/11/30 18:03:51 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
*
* - GNU Lesser General Public License Version 2.1
* - Sun Industry Standards Source License Version 1.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*
* Sun Industry Standards Source License Version 1.1
* =================================================
* The contents of this file are subject to the Sun Industry Standards
* Source License Version 1.1 (the "License"); You may not use this file
* except in compliance with the License. You may obtain a copy of the
* License at http://www.openoffice.org/license.html.
*
* Software provided under this License is provided on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
* See the License for the specific provisions governing your rights and
* obligations concerning the Software.
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
package com.sun.xmlsearch.xml.qe;
import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.text.MessageFormat;
import com.jclark.xsl.om.*;
import com.jclark.xsl.tr.Result;
import com.jclark.xsl.tr.OutputMethod;
import com.jclark.xsl.tr.LoadContext;
import com.jclark.xsl.dom.Transform;
import com.jclark.xsl.dom.TransformEngine;
import com.jclark.xsl.dom.TransformException;
import com.jclark.xsl.dom.XSLTransformEngine;
import com.sun.xmlsearch.util.*;
import com.sun.xmlsearch.tree.*;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.sun.xml.tree.XmlDocument;
import javax.swing.tree.TreeNode;
public final class DocumentFragmentServerImpl
implements DocumentFragmentServer, Names {
final class MyXslEngine extends XSLTransformEngine {
public Node load(URL url,
int documentIndex,
LoadContext context,
NameTable nameTable) throws XSLException {
System.out.println("loading " + url.toString());
try {
return parseTargetDocument(url);
}
catch (Exception e) {
throw new XSLException(e);
}
}
}
private DocTypeCatalog _catalog;
private String Dir = System.getProperty("XMLSEARCH")+"/samples/docs/";
private String Http = "http://localhost:8084/";
private static final MessageFormat StepFormat =
new MessageFormat("{0}[{1,number,integer}]");
private Tokenizer _tokenizer = new SimpleTokenizer();
private TreeBuilder _treeBuilder;
private TreeResult _treeResult;
private HtmlContent _html;
private Cache _documentCache = new Cache(90000);
// _transformCache can also be a cache if makes sense
private Hashtable _transformCache = new Hashtable();
private MyXslEngine _transformEngine;
private Transform _defaultTransform;
// state of the modified tree
private Hashtable _tokenNumbers = new Hashtable();
private Vector _substituted = new Vector();
private Name _StartHighlight_Name;
private Name _EndHighlight_Name;
private Name _HighlightedText_Name;
private Node _StartHighlight_Node;
private Node _EndHighlight_Node;
public DocumentFragmentServerImpl(String[] args) throws Exception {
Dir = args[0]; // local docs mount point
Http = args[1]; // http address for the same
initXmlProcessor();
_html = new HtmlContent(_treeBuilder);
URL catalog = new URL("http://localhost:8084/DocTypeCatalog.cfg");
_catalog = new DocTypeCatalog(catalog);
}
private final void initXmlProcessor() throws Exception {
_transformEngine = new MyXslEngine();
URL stylesheetUrl = new URL(Http + "default.xsl");
InputStream stylesheetStream = stylesheetUrl.openStream();
XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream,
false);
_defaultTransform = _transformEngine.createTransform(sheet);
_treeBuilder = new TreeBuilder(_transformEngine.getNameTable());
_treeResult = new TreeResult(_transformEngine.getNameTable());
_StartHighlight_Name = getElementName("StartHighlight");
_EndHighlight_Name = getElementName("EndHighlight");
_HighlightedText_Name = getElementName("HighlightedText");
_StartHighlight_Node =
_treeBuilder.makeEmptyElement(_StartHighlight_Name);
_EndHighlight_Node = _treeBuilder.makeEmptyElement(_EndHighlight_Name);
}
public Node followXPath(Node current, String xPath) throws Exception {
StringTokenizer steps = new StringTokenizer(xPath, "/");
while (steps.hasMoreTokens())
current = xPathStep(current, steps.nextToken());
return current;
}
public final Name getElementName(String elementName) throws XSLException {
return _treeBuilder.getElementName(elementName);
}
public final Name getAttributeName(String elementName) throws XSLException {
return _treeBuilder.getAttributeName(elementName);
}
public Node[] expandXPath(Node current, String xPath) throws Exception {
StringTokenizer steps = new StringTokenizer(xPath, "/");
Node[] result = new Node[steps.countTokens() + 1];
int index = 0;
result[0] = current;
while (steps.hasMoreTokens()) {
current = xPathStep(current, steps.nextToken());
result[++index] = current;
}
return result;
}
private Node xPathStep(Node start, String step) throws Exception {
// !!! optimize it; maybe use a Hashtable mapping steps to actions
if (step.equals("doc")) {
// find root
while (start.getParent() != null)
start = start.getParent();
return start;
}
else {
// form: type[n-of-type]
Object[] parts = StepFormat.parse(step);
final String name = (String)parts[0];
int n = ((Long)parts[1]).intValue();
SafeNodeIterator children = start.getChildren();
Node child;
if (name.equals("text()")) {
// find 'n'th text node child
while ((child = children.next()) != null)
if (child.getType() == Node.TEXT && (--n == 0))
return child; // no more steps expected
}
else {
// get the interned 'Name'
NamespacePrefixMap npm = start.getNamespacePrefixMap();
Name elementName = npm.expandElementTypeName(name, start);
// find 'n'th child with tagName 'name'
while ((child = children.next()) != null)
if (child.getType() == Node.ELEMENT &&
child.getName() == elementName && (--n == 0))
return child;
}
}
// if not returned above
throw new Exception("mis-step in xPath: " + step + " " + start.getName());
}
private Node parseTargetDocument(URL docUrl) throws Exception {
return _treeBuilder.getRoot(docUrl);
}
private Node parseTargetDocument(InputSource source) throws Exception {
return _treeBuilder.getRoot(source);
}
private Node parseInputSource(InputSource in) throws Exception {
return _treeBuilder.getRoot(in);
}
private final class DocumentData {
private final URL _docURL;
private final String _docType;
private final Node _documentTree;
private TocTree _tocTree;
private Hashtable _docTocCorrespondence;
public DocumentData(URL docUrl, String docType) throws Exception {
_docURL = docUrl;
_docType = docType;
_documentTree = parseTargetDocument(docUrl);
}
public Node getDocumentTree() {
return _documentTree;
}
public TocTree getTocTree(Names names) throws Exception {
return _tocTree != null ? _tocTree : createTocTree(names);
}
public TocTreePath getTocTreePath(String path, Names names)
throws Exception {
final TocTree tocTree = getTocTree(names);
return new TocTreePath(tocTree,
getTocTreePathIndexes(path,
tocTree));
}
public Node fill(DocumentRequest request, DocumentFragment result,
Names names) throws Exception {
final TocTree tocTree = getTocTree(names);
// ----------- TOC ------------------
if (request.isTocRequested())
result.setTOC(tocTree);
// ----------- TOC path ------------------
final IntegerArray array = new IntegerArray();
String focusPath = request.getFocus().getCommonPath();
if (focusPath.indexOf('/') == 0)
focusPath = focusPath.substring(1);
StringTokenizer steps = new StringTokenizer(focusPath, "/");
Node docNode = _documentTree;
TocTree.TocNode tocNode = tocTree.getRoot();
array.add(tocNode.index());
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
result.setPath(array.toIntArray());
// ------------ subtree to transform ----------------------
if (docNode.getType() == Node.TEXT) {
docNode = docNode.getParent();
while (getTocNodeForDocNode(docNode) == null)
docNode = docNode.getParent();
}
return docNode;
}
private int[] getTocTreePathIndexes(String path, TocTree tocTree)
throws Exception {
final IntegerArray array = new IntegerArray();
StringTokenizer steps = new StringTokenizer(path, "/");
Node docNode = _documentTree;
TocTree.TocNode tocNode = tocTree.getRoot();
array.add(tocNode.index());
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
return array.toIntArray();
}
private TocTree createTocTree(Names names) throws Exception {
Transform transform = getTocTransformForType(_docType);
TocTree tree =
TocTree.makeTocTree(_documentTree, transform, names);
_docTocCorrespondence = new Hashtable();
establishCorrespondence(tree.getRoot());
return _tocTree = tree;
}
private TocTree.TocNode getTocNodeForDocNode(Node docNode) {
return (TocTree.TocNode)_docTocCorrespondence.get(docNode);
}
private void establishCorrespondence(TocTree.TocNode tocNode)
throws Exception {
// System.out.println("tocNode = " + tocNode.toString());
final int nChildren = tocNode.getChildCount();
_docTocCorrespondence.put(followXPath(_documentTree,
tocNode.getXPath()),
tocNode);
for (int i = 0; i < nChildren; i++)
establishCorrespondence((TocTree.TocNode)
tocNode.getChildAt(i));
}
} // end of DocumentData
private DocumentData getDocumentData(URL docUrl, String docType) throws Exception {
Object value = _documentCache.get(docUrl);
if (value != null)
return (DocumentData)value;
else {
DocumentData newData = new DocumentData(docUrl, docType);
_documentCache.put(docUrl, newData);
return newData;
}
}
private synchronized Node getDocumentTree(URL docUrl, String docType)
throws Exception {
return getDocumentData(docUrl, docType).getDocumentTree();
}
private synchronized TocTree getTocTree(URL docUrl, String docType)
throws Exception {
return getDocumentData(docUrl, docType).getTocTree(this);
}
private synchronized TocTreePath getTocTreePath(QueryHitData queryHit)
throws Exception {
URL docUrl = new URL(queryHit.getDocument());
return getDocumentData(docUrl, queryHit.getDocumentType())
.getTocTreePath(queryHit.getCommonPath(), this);
/*
StringTokenizer steps
= new StringTokenizer(queryHit.getCommonPath(), "/");
// root of TOC
TocTree tocTree = getTocTree(docUrl);
TocTree.TocNode tocNode = tocTree.getRoot();
// root of document tree
Node docNode = getDocumentTree(docUrl);
IntegerArray array = new IntegerArray();
array.add(tocNode.index());
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
return new TocTreePath(tocTree, array.toIntArray());
*/
}
/*
// little function, lots of functionality
private synchronized TocTree getTocTree(URL docUrl) throws Exception {
Object value = _tocTreeCache.get(docUrl);
if (value == null) {
Node docRoot = getDocumentTree(docUrl);
Transform tocTransform = getTocTransformForDoc(docUrl);
TocTree tree = TocTree.makeTocTree(docRoot, tocTransform, this);
TocTree.TocNode tocRoot = tree.getRoot();
_tocTreeCache.put(docUrl, tree);
establishCorrespondence(docRoot, tocRoot);
return tree;
}
else
return (TocTree)value;
}
*/
/*
private void dumpTree(Node root) {
dumpTree(System.out, root, "");
}
private void dumpTree(PrintStream out, Node node, String indent) {
out.print(indent);
out.println(node.toString());
SafeNodeIterator children = node.getChildren(); Node child;
while ((child = children.next()) != null)
dumpTree(out, child, indent + " ");
}
*/
private Transform getTransformForDoc(URL docUrl, String fileName)
throws Exception {
System.out.println(docUrl);
String docFname = docUrl.getFile();
System.out.println("fname: " + docFname);
int slash = docFname.indexOf('/', 1);
String transFname = docFname.substring(0, slash + 1) + fileName;
URL stylesheetUrl = new URL(docUrl.getProtocol(),
docUrl.getHost(),
docUrl.getPort(),
transFname);
return getTransform(stylesheetUrl);
}
private synchronized Transform getTransform(URL stylesheetUrl)
throws Exception {
if (stylesheetUrl != null) {
Object value = _transformCache.get(stylesheetUrl);
if (value != null)
return (Transform)value;
else {
System.out.println("creating transform: " + stylesheetUrl);
InputStream stylesheetStream = stylesheetUrl.openStream();
XmlDocument sheet =
XmlDocument.createXmlDocument(stylesheetStream, false);
Transform transform = _transformEngine.createTransform(sheet);
_transformCache.put(stylesheetUrl, transform);
return transform;
}
}
else
return null;
}
private Transform getTocTransformForType(String docType) throws Exception {
return getTransform(new URL(_catalog.getTocTransformUrlString(docType)));
}
private Transform getHtmlTransformForType(String docType) throws Exception {
return getTransform(new URL(_catalog.getToHtmlTransformUrlString(docType)));
}
private void addNumber(Object key, int n) {
// System.out.println("addNumber " + key + " " + n);
IntegerArray array = (IntegerArray)_tokenNumbers.get(key);
if (array == null)
_tokenNumbers.put(key, array = new IntegerArray(8));
array.addNew(n);
}
private void revertTree() {
for (int i = 0; i < _substituted.size(); i++)
_treeBuilder.revertToOriginal((Node)_substituted.elementAt(i));
}
private Node makeTextNode(String text) {
return _treeBuilder.makeTextNode(text);
}
private Node makeSubstituteElement(Node textNode, Node2[] childrenArray) {
return _treeBuilder.makeSubstituteElement(_HighlightedText_Name,
childrenArray,
textNode);
}
private boolean isWhitespace(final String text, int start, final int limit) {
while (start < limit && Character.isWhitespace(text.charAt(start)))
++start;
return start == limit;
}
private void highlightTree() {
Enumeration keys = _tokenNumbers.keys();
while (keys.hasMoreElements()) {
Vector children = new Vector();
Node textNode = (Node)keys.nextElement();
IntegerArray numbers = (IntegerArray)_tokenNumbers.get(textNode);
numbers.sort(); // word numbers in ascending order
String text = textNode.getData();
int tokenNumber = 0;
int lastOffset = 0;
// select tokenizers
_tokenizer.setText(text);
for (int i = 0; i < numbers.cardinality(); i++) {
int n = numbers.at(i);
Token token;
do {
token = _tokenizer.nextToken();
}
while (++tokenNumber < n);
final int start = token.getStart();
final int end = token.getEnd();
if (lastOffset < start)
children
.addElement(makeTextNode(isWhitespace(text,
lastOffset,
start)
? " "
: text.substring(lastOffset,
start)));
children.addElement(_StartHighlight_Node);
children.addElement(makeTextNode(token.getData()));
children.addElement(_EndHighlight_Node);
lastOffset = end;
}
if (lastOffset < text.length())
children.addElement(makeTextNode(text.substring(lastOffset)));
final Node2[] childrenArray = new Node2[children.size()];
children.toArray(childrenArray);
Node highlighted = makeSubstituteElement(textNode, childrenArray);
Node2 parent = (Node2)textNode.getParent();
for (int i = 0; i < childrenArray.length; i++)
((Node2)childrenArray[i]).setParent(highlighted);
// actually substitute
int index = parent.getChildIndex(textNode);
parent.setChild(index, highlighted);
((Node2)highlighted).setParent(parent);
_substituted.addElement(highlighted);
}
}
private synchronized void transform(Transform tr, Node[] nodes,
ResultAdapter res)
throws Exception {
if (tr != null) {
res.init();
for (int i = 0; i < nodes.length; i++)
tr.transform(nodes[i], res);
res.finish();
}
}
public synchronized DocumentFragment getDocumentFragment(DocumentRequest
request)
throws Exception {
long start = System.currentTimeMillis();
// !!! for testing
/*
_transformCache.clear();
_docTocCorrespondence.clear();
_docTreeCache.clear();
_tocTreeCache.clear();
*/
try {
String docUrlString = request.getDocument();
final String docType = request.getDocumentType();
System.out.println("docUrlString = " + docUrlString);
System.out.println("docType = " + docType);
// the prefix for the servlet version
final String prefix = "/servlet/document?doc=";
//!!! still a hack
if (docUrlString.startsWith(prefix)) {
String className = docUrlString.substring(prefix.length());
System.out.println(className);
String classFile = "/" + className + ".xml";
docUrlString = Http + "Java2JDK1.3/" + classFile;
}
/*
for (int i = 0; i < JavaDirs.length; i++)
if ((new File(Dir + JavaDirs[i] + classFile)).exists()) {
docUrlString = Http + JavaDirs[i] + classFile;
break;
}
}
else if (docUrlString.startsWith(manual)) {
String pageName = docUrlString.substring(manual.length());
int dash = pageName.lastIndexOf('-');
String title = pageName
.substring(0,dash)
.toLowerCase()
.replace('-','_');
String ext = pageName.substring(dash + 1).toLowerCase();
String filename = Dir+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
System.out.println("filename: " + filename);
if ((new File(filename)).exists())
docUrlString = Http+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
}
*/
final URL docUrl = new URL(docUrlString);
System.out.println("\""+docUrlString+"\"");
final DocumentData docData = getDocumentData(docUrl, docType);
// root of TOC
/*
final TocTree tocTree = getTocTree(docUrl);
TocTree.TocNode tocNode = tocTree.getRoot();
*/
DocumentFragment result = new DocumentFragment();
Node subtree = docData.fill(request, result, this);
/*
if (request.isTocRequested())
result.setTOC(tocTree);
final IntegerArray array = new IntegerArray();
array.add(tocNode.index());
String focusPath = request.getFocus().getCommonPath();
if (focusPath.indexOf('/') == 0)
focusPath = focusPath.substring(1);
StringTokenizer steps = new StringTokenizer(focusPath, "/");
final Node docRoot = getDocumentTree(docUrl);
Node docNode = docRoot;
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
result.setPath(array.toIntArray());
result.setPath(docData.getTocTreePathIndexes(focusPath, this));
*/
// docNode is selected for transformation
// find highlighted nodes that are its descendants
// ... by xPath matching
// !!! repeated in DocumentData
String focusPath = request.getFocus().getCommonPath();
if (focusPath.indexOf('/') == 0)
focusPath = focusPath.substring(1);
for (int i = 0; i < request.size(); i++) {
MultiTokenLocator loc = request.getLocator(i);
String locPath = loc.getCommonPath();
// System.out.println("locPath = " + locPath);
if (locPath.startsWith(focusPath) || focusPath.equals("doc")) {
// System.out.println("matches");
int nTerms = loc.getNumberOfTerms();
Node locNode = followXPath(docData.getDocumentTree(),
locPath);
for (int j = 0; j < nTerms; j++)
if (loc.getTerm(j) != null)
addNumber(followXPath(locNode, loc.getPath(j)),
loc.getTokenNumber(j));
}
}
ByteArrayOutputStream out = new ByteArrayOutputStream(1024*10);
_html.reset();
_html.setOutputStream(out);
/*
Node subtree = docNode;
if (subtree.getType() == Node.TEXT) {
subtree = subtree.getParent();
while (getTocNodeForDocNode(subtree) == null)
subtree = subtree.getParent();
}
*/
//System.out.println("subtree " + subtree.getName()
//.getLocalPart());
highlightTree();
transform(getHtmlTransformForType(docType),
new Node[] { subtree }, _html);
byte[] bytes = out.toByteArray();
System.out.println(bytes.length + " HTML bytes");
result.setHTML(bytes);
result.setNumberOfHighlights(_html.getNumberOfHighlights());
/*
OutputStream outf = new FileOutputStream("/tmp/test.html");
outf.write(bytes);
outf.close();
*/
return result;
}
catch (Exception e) {
e.printStackTrace();
throw e;
}
finally {
revertTree();
_tokenNumbers.clear();
_substituted.setSize(0);
System.out.println((System.currentTimeMillis() - start)
+" msec getDocumentFragment");
}
}
public synchronized CollectionModel getCollectionModel(String docType)
throws Exception {
try {
URL modelUrl = new URL(_catalog.getModel(docType));
Node modelRoot = getDocumentTree(modelUrl, "");
URL identityTransfUrl = new URL(Http + "identity.xsl");
Transform tocTransform = getTransform(identityTransfUrl);
TocTree tree = TocTree.makeTocTree(modelRoot, tocTransform, this);
CollectionModel model = new CollectionModel();
model.setTree(tree);
return model;
}
catch (Exception e) {
System.err.println("getCollectionModel ");
e.printStackTrace();
throw e;
}
}
}