Source Code of com.sun.xmlsearch.xml.qe.DocumentFragmentServerImpl$DocumentData

/*************************************************************************
 *
 *  OpenOffice.org - a multi-platform office productivity suite
 *
 *  $RCSfile: DocumentFragmentServerImpl.java,v $
 *
 *  $Revision: 1.2 $
 *
 *  last change: $Author: rt $ $Date: 2005/09/09 16:56:56 $
 *
 *  The Contents of this file are made available subject to
 *  the terms of GNU Lesser General Public License Version 2.1.
 *
 *
 *    GNU Lesser General Public License Version 2.1
 *    =============================================
 *    Copyright 2005 by Sun Microsystems, Inc.
 *    901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License version 2.1, as published by the Free Software Foundation.
 *
 *    This library is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 *
 *    You should have received a copy of the GNU Lesser General Public
 *    License along with this library; if not, write to the Free Software
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *    MA  02111-1307  USA
 *
 ************************************************************************/


package com.sun.xmlsearch.xml.qe;


import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.text.MessageFormat;


import com.jclark.xsl.om.*;
import com.jclark.xsl.tr.Result;
import com.jclark.xsl.tr.OutputMethod;
import com.jclark.xsl.tr.LoadContext;


import com.jclark.xsl.dom.Transform;
import com.jclark.xsl.dom.TransformEngine;
import com.jclark.xsl.dom.TransformException;
import com.jclark.xsl.dom.XSLTransformEngine;


import com.sun.xmlsearch.util.*;
import com.sun.xmlsearch.tree.*;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.sun.xml.tree.XmlDocument;
import javax.swing.tree.TreeNode;


public final class DocumentFragmentServerImpl
    implements DocumentFragmentServer, Names {
  
    final class MyXslEngine extends XSLTransformEngine {
  public Node load(URL url,
       int documentIndex,
       LoadContext context,
       NameTable nameTable) throws XSLException {
      System.out.println("loading " + url.toString());
      try {
    return parseTargetDocument(url);
      }
      catch (Exception e) {
    throw new XSLException(e);
      }
  }
    }
  
    private DocTypeCatalog _catalog;
    private String Dir = System.getProperty("XMLSEARCH")+"/samples/docs/";
    private String Http = "http://localhost:8084/";
  
    private static final MessageFormat StepFormat =
  new MessageFormat("{0}[{1,number,integer}]");


    private Tokenizer _tokenizer = new SimpleTokenizer();
    private TreeBuilder _treeBuilder;
    private TreeResult  _treeResult;
    private HtmlContent _html;


    private Cache _documentCache = new Cache(90000);
    // _transformCache can also be a cache if makes sense
    private Hashtable _transformCache = new Hashtable();


    private MyXslEngine _transformEngine;
    private Transform _defaultTransform;


    // state of the modified tree
    private Hashtable _tokenNumbers = new Hashtable();
    private Vector _substituted = new Vector();


    private Name _StartHighlight_Name;
    private Name _EndHighlight_Name;
  
    private Name _HighlightedText_Name;
    private Node _StartHighlight_Node;
    private Node _EndHighlight_Node;


    public DocumentFragmentServerImpl(String[] args) throws Exception {
  Dir  = args[0];    // local docs mount point
  Http = args[1];    // http address for the same
  initXmlProcessor();
  _html = new HtmlContent(_treeBuilder);
  
  URL catalog = new URL("http://localhost:8084/DocTypeCatalog.cfg");
  _catalog = new DocTypeCatalog(catalog);
    }
  
    private final void initXmlProcessor() throws Exception {
  _transformEngine = new MyXslEngine();
    
  URL stylesheetUrl = new URL(Http + "default.xsl");
  InputStream stylesheetStream = stylesheetUrl.openStream();
  XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream,
                false);
  _defaultTransform = _transformEngine.createTransform(sheet);
  _treeBuilder = new TreeBuilder(_transformEngine.getNameTable());
  _treeResult = new TreeResult(_transformEngine.getNameTable());


  _StartHighlight_Name = getElementName("StartHighlight");
  _EndHighlight_Name = getElementName("EndHighlight");


  _HighlightedText_Name = getElementName("HighlightedText");
  _StartHighlight_Node =
      _treeBuilder.makeEmptyElement(_StartHighlight_Name);
  _EndHighlight_Node = _treeBuilder.makeEmptyElement(_EndHighlight_Name);
    }


    public Node followXPath(Node current, String xPath) throws Exception {
  StringTokenizer steps = new StringTokenizer(xPath, "/");
  while (steps.hasMoreTokens())
      current = xPathStep(current, steps.nextToken());
  return current;
    }


    public final Name getElementName(String elementName) throws XSLException {
  return _treeBuilder.getElementName(elementName);
    }


    public final Name getAttributeName(String elementName) throws XSLException {
  return _treeBuilder.getAttributeName(elementName);
    }


    public Node[] expandXPath(Node current, String xPath) throws Exception {
  StringTokenizer steps = new StringTokenizer(xPath, "/");
  Node[] result = new Node[steps.countTokens() + 1];
  int index = 0;
  result[0] = current;
  while (steps.hasMoreTokens()) {
      current = xPathStep(current, steps.nextToken());
      result[++index] = current;
  }
  return result;
    }


    private Node xPathStep(Node start, String step) throws Exception {
  // !!! optimize it; maybe use a Hashtable mapping steps to actions
  if (step.equals("doc")) {
      // find root
      while (start.getParent() != null)
    start = start.getParent();
      return start;
  }
  else {
      // form:  type[n-of-type]
      Object[] parts = StepFormat.parse(step);
      final String name = (String)parts[0];
      int n = ((Long)parts[1]).intValue();
      SafeNodeIterator children = start.getChildren();
      Node child;


      if (name.equals("text()")) {
        // find 'n'th text node child
    while ((child = children.next()) != null)
        if (child.getType() == Node.TEXT && (--n == 0))
      return child;  // no more steps expected
      }
      else {
    // get the interned 'Name'
    NamespacePrefixMap npm = start.getNamespacePrefixMap();
    Name elementName = npm.expandElementTypeName(name, start);
    // find 'n'th child with tagName 'name'
    while ((child = children.next()) != null)
        if (child.getType() == Node.ELEMENT &&
      child.getName() == elementName && (--n == 0))
      return child;
      }
  }
  // if not returned above
  throw new Exception("mis-step in xPath: " + step + " " + start.getName());
    }


    private Node parseTargetDocument(URL docUrl) throws Exception {
  return _treeBuilder.getRoot(docUrl);
    }
  
    private Node parseTargetDocument(InputSource source) throws Exception {
  return _treeBuilder.getRoot(source);
    }


    private Node parseInputSource(InputSource in) throws Exception {
  return _treeBuilder.getRoot(in);
    }


    private final class DocumentData {
  private final URL     _docURL;
  private final String  _docType;
  private final Node    _documentTree;
  private TocTree       _tocTree;
  private Hashtable     _docTocCorrespondence;
    
  public DocumentData(URL docUrl, String docType) throws Exception {
      _docURL = docUrl;
      _docType = docType;
      _documentTree = parseTargetDocument(docUrl);
  }


  public Node getDocumentTree() {
      return _documentTree;
  }


  public TocTree getTocTree(Names names) throws Exception {
      return _tocTree != null ? _tocTree : createTocTree(names);
  }
    
  public TocTreePath getTocTreePath(String path, Names names)
      throws Exception {
      final TocTree tocTree = getTocTree(names);
      return new TocTreePath(tocTree,
           getTocTreePathIndexes(path,
               tocTree));
  }


  public Node fill(DocumentRequest request, DocumentFragment result,
       Names names) throws Exception {
      final TocTree tocTree = getTocTree(names);
      // ----------- TOC ------------------
      if (request.isTocRequested())
    result.setTOC(tocTree);
       
      // ----------- TOC path ------------------
      final IntegerArray array = new IntegerArray();
      String focusPath = request.getFocus().getCommonPath();
      if (focusPath.indexOf('/') == 0)
    focusPath = focusPath.substring(1);
      StringTokenizer steps = new StringTokenizer(focusPath, "/");
      Node docNode = _documentTree;
      TocTree.TocNode tocNode = tocTree.getRoot();
      array.add(tocNode.index());
      while (steps.hasMoreTokens()) {
    docNode = xPathStep(docNode, steps.nextToken());
    tocNode = getTocNodeForDocNode(docNode);
    if (tocNode != null)
        array.add(tocNode.index());
      }
      result.setPath(array.toIntArray());
      // ------------ subtree to transform ----------------------
      if (docNode.getType() == Node.TEXT) {
    docNode = docNode.getParent();
    while (getTocNodeForDocNode(docNode) == null)
        docNode = docNode.getParent();
      }
      return docNode;
  }
  
  private int[] getTocTreePathIndexes(String path, TocTree tocTree)
      throws Exception {
    final IntegerArray array = new IntegerArray();
    StringTokenizer steps = new StringTokenizer(path, "/");
    Node docNode = _documentTree;
    TocTree.TocNode tocNode = tocTree.getRoot();
    array.add(tocNode.index());
    while (steps.hasMoreTokens()) {
        docNode = xPathStep(docNode, steps.nextToken());
        tocNode = getTocNodeForDocNode(docNode);
        if (tocNode != null)
      array.add(tocNode.index());
    }
    return array.toIntArray();
      }
    
  private TocTree createTocTree(Names names) throws Exception {
      Transform transform = getTocTransformForType(_docType);
      TocTree tree =
    TocTree.makeTocTree(_documentTree, transform, names);
      _docTocCorrespondence = new Hashtable();
      establishCorrespondence(tree.getRoot());
      return _tocTree = tree;
  }


  private TocTree.TocNode getTocNodeForDocNode(Node docNode) {
      return (TocTree.TocNode)_docTocCorrespondence.get(docNode);
  }
    
  private void establishCorrespondence(TocTree.TocNode tocNode)
      throws Exception {
      //      System.out.println("tocNode = " + tocNode.toString());
      final int nChildren = tocNode.getChildCount();
      _docTocCorrespondence.put(followXPath(_documentTree,
              tocNode.getXPath()),
              tocNode);
      for (int i = 0; i < nChildren; i++)
    establishCorrespondence((TocTree.TocNode)
          tocNode.getChildAt(i));
  }
    } // end of DocumentData


    private DocumentData getDocumentData(URL docUrl, String docType) throws Exception {
  Object value = _documentCache.get(docUrl);
  if (value != null)
      return (DocumentData)value;
  else {
      DocumentData newData = new DocumentData(docUrl, docType);
      _documentCache.put(docUrl, newData);
      return newData;
  }
    }


    private synchronized Node getDocumentTree(URL docUrl, String docType)
  throws Exception {
  return getDocumentData(docUrl, docType).getDocumentTree();
    }


    private synchronized TocTree getTocTree(URL docUrl, String docType)
  throws Exception {
  return getDocumentData(docUrl, docType).getTocTree(this);
    }


    private synchronized TocTreePath getTocTreePath(QueryHitData queryHit)
  throws Exception {
  URL docUrl = new URL(queryHit.getDocument());
  return getDocumentData(docUrl, queryHit.getDocumentType())
      .getTocTreePath(queryHit.getCommonPath(), this);
  /*
    StringTokenizer steps
    = new StringTokenizer(queryHit.getCommonPath(), "/");
    // root of TOC
    TocTree tocTree = getTocTree(docUrl);
    TocTree.TocNode tocNode = tocTree.getRoot();
    // root of document tree
    Node docNode = getDocumentTree(docUrl);
    IntegerArray array = new IntegerArray();
    array.add(tocNode.index());
    while (steps.hasMoreTokens()) {
    docNode = xPathStep(docNode, steps.nextToken());
    tocNode = getTocNodeForDocNode(docNode);
    if (tocNode != null)
    array.add(tocNode.index());
    }
    return new TocTreePath(tocTree, array.toIntArray());
  */
    }




    /*
      // little function, lots of functionality
      private synchronized TocTree getTocTree(URL docUrl) throws Exception {
      Object value = _tocTreeCache.get(docUrl);
      if (value == null) {
      Node docRoot = getDocumentTree(docUrl);
      Transform tocTransform = getTocTransformForDoc(docUrl);
      TocTree tree = TocTree.makeTocTree(docRoot, tocTransform, this);
      TocTree.TocNode tocRoot = tree.getRoot();
      _tocTreeCache.put(docUrl, tree);
      establishCorrespondence(docRoot, tocRoot);
      return tree;
      }
      else
      return (TocTree)value;
      }
    */


    /*
      private void dumpTree(Node root) {
      dumpTree(System.out, root, "");
      }


      private void dumpTree(PrintStream out, Node node, String indent) {
      out.print(indent);
      out.println(node.toString());
      SafeNodeIterator children = node.getChildren(); Node child;
      while ((child = children.next()) != null)
      dumpTree(out, child, indent + "    ");
      }
    */


    private Transform getTransformForDoc(URL docUrl, String fileName)
  throws Exception {
  System.out.println(docUrl);
  String docFname = docUrl.getFile();
  System.out.println("fname: " + docFname);
  int slash = docFname.indexOf('/', 1);
  String transFname = docFname.substring(0, slash + 1) + fileName;
  URL stylesheetUrl = new URL(docUrl.getProtocol(),
            docUrl.getHost(),
            docUrl.getPort(),
            transFname);
  return getTransform(stylesheetUrl);
    }


    private synchronized Transform getTransform(URL stylesheetUrl)
  throws Exception {
  if (stylesheetUrl != null) {
      Object value = _transformCache.get(stylesheetUrl);
      if (value != null)
    return (Transform)value;
      else {
    System.out.println("creating transform: " + stylesheetUrl);
    InputStream stylesheetStream = stylesheetUrl.openStream();
    XmlDocument sheet =
        XmlDocument.createXmlDocument(stylesheetStream, false);
    Transform transform = _transformEngine.createTransform(sheet);
    _transformCache.put(stylesheetUrl, transform);
    return transform;
      }
  }
  else
      return null;
    }




    private Transform getTocTransformForType(String docType) throws Exception {
  return getTransform(new URL(_catalog.getTocTransformUrlString(docType)));
    }
  
    private Transform getHtmlTransformForType(String docType) throws Exception {
  return getTransform(new URL(_catalog.getToHtmlTransformUrlString(docType)));
    }


    private void addNumber(Object key, int n) {
  //    System.out.println("addNumber " + key + " " + n);
  IntegerArray array = (IntegerArray)_tokenNumbers.get(key);
  if (array == null)
      _tokenNumbers.put(key, array = new IntegerArray(8));
  array.addNew(n);
    }
  
    private void revertTree() {
  for (int i = 0; i < _substituted.size(); i++)
      _treeBuilder.revertToOriginal((Node)_substituted.elementAt(i));
    }


    private Node makeTextNode(String text) {
  return _treeBuilder.makeTextNode(text);
    }


    private Node makeSubstituteElement(Node textNode, Node2[] childrenArray) {
  return _treeBuilder.makeSubstituteElement(_HighlightedText_Name,
              childrenArray,
              textNode);
    }


    private boolean isWhitespace(final String text, int start, final int limit) {
  while (start < limit && Character.isWhitespace(text.charAt(start)))
      ++start;
  return start == limit;
    }
    
    private void highlightTree() {
  Enumeration keys = _tokenNumbers.keys();
  while (keys.hasMoreElements()) {
      Vector children = new Vector();
      Node textNode = (Node)keys.nextElement();
      IntegerArray numbers = (IntegerArray)_tokenNumbers.get(textNode);
      numbers.sort();    // word numbers in ascending order
      String text = textNode.getData();
      int tokenNumber = 0;
      int lastOffset = 0;
      // select tokenizers
      _tokenizer.setText(text);
      
      for (int i = 0; i < numbers.cardinality(); i++) {
    int n = numbers.at(i);
    Token token;
    do {
        token = _tokenizer.nextToken();
    }
    while (++tokenNumber < n);
    final int start = token.getStart();
    final int end   = token.getEnd();
    if (lastOffset < start)
        children
      .addElement(makeTextNode(isWhitespace(text,
                    lastOffset,
                    start)
             ? "&nbsp;"
             : text.substring(lastOffset,
                  start)));
    children.addElement(_StartHighlight_Node);
    children.addElement(makeTextNode(token.getData()));
    children.addElement(_EndHighlight_Node);
    lastOffset = end;
      }
      if (lastOffset < text.length())
    children.addElement(makeTextNode(text.substring(lastOffset)));
      
      final Node2[] childrenArray = new Node2[children.size()];
      children.toArray(childrenArray);
      Node highlighted = makeSubstituteElement(textNode, childrenArray);
      Node2 parent = (Node2)textNode.getParent();
      for (int i = 0; i < childrenArray.length; i++)
    ((Node2)childrenArray[i]).setParent(highlighted);
      // actually substitute
      int index = parent.getChildIndex(textNode);
      parent.setChild(index, highlighted);
      ((Node2)highlighted).setParent(parent);
      _substituted.addElement(highlighted);
  }
    }


    private synchronized void transform(Transform tr, Node[] nodes,
          ResultAdapter res)
  throws Exception {
  if (tr != null) {
      res.init();
      for (int i = 0; i < nodes.length; i++)
    tr.transform(nodes[i], res);
      res.finish();
  }
    }
  
    public synchronized DocumentFragment getDocumentFragment(DocumentRequest
                   request)
  throws Exception {
  long start = System.currentTimeMillis();
  // !!! for testing
  /*
    _transformCache.clear();
    _docTocCorrespondence.clear();
    _docTreeCache.clear();
    _tocTreeCache.clear();
  */
      
  try {
      String docUrlString = request.getDocument();
      final String docType = request.getDocumentType();


      System.out.println("docUrlString = " + docUrlString);
      System.out.println("docType = " + docType);
      


      // the prefix for the servlet version
      final String prefix    = "/servlet/document?doc=";
      
      //!!! still a hack
      if (docUrlString.startsWith(prefix)) {
    String className = docUrlString.substring(prefix.length());
    System.out.println(className);
    String classFile = "/" + className + ".xml";
    docUrlString = Http + "Java2JDK1.3/" + classFile;
      }
      /*
        for (int i = 0; i < JavaDirs.length; i++)
        if ((new File(Dir + JavaDirs[i] + classFile)).exists()) {
        docUrlString = Http + JavaDirs[i] + classFile;
        break;
        }
        }
        else if (docUrlString.startsWith(manual)) {
        String pageName = docUrlString.substring(manual.length());
        int dash = pageName.lastIndexOf('-');
        String title = pageName
        .substring(0,dash)
        .toLowerCase()
        .replace('-','_');
        String ext = pageName.substring(dash + 1).toLowerCase();
        String filename = Dir+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
        System.out.println("filename: " + filename);
        if ((new File(filename)).exists())
        docUrlString = Http+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
        }
      */
      
      final URL docUrl = new URL(docUrlString);
      System.out.println("\""+docUrlString+"\"");
      final DocumentData docData = getDocumentData(docUrl, docType);
      
      // root of TOC
      /*
        final TocTree tocTree = getTocTree(docUrl);
        TocTree.TocNode tocNode = tocTree.getRoot();
      */
      DocumentFragment result = new DocumentFragment();
      Node subtree = docData.fill(request, result, this);
      /*
        if (request.isTocRequested())
        result.setTOC(tocTree);
        final IntegerArray array = new IntegerArray();
        array.add(tocNode.index());
        String focusPath = request.getFocus().getCommonPath();
        if (focusPath.indexOf('/') == 0)
        focusPath = focusPath.substring(1);
        
        StringTokenizer steps = new StringTokenizer(focusPath, "/");
        final Node docRoot = getDocumentTree(docUrl);
        Node docNode = docRoot;
        while (steps.hasMoreTokens()) {
        docNode = xPathStep(docNode, steps.nextToken());
        tocNode = getTocNodeForDocNode(docNode);
        if (tocNode != null)
        array.add(tocNode.index());
        }
        result.setPath(array.toIntArray());
        result.setPath(docData.getTocTreePathIndexes(focusPath, this));
      */
      
      // docNode is selected for transformation
      // find highlighted nodes that are its descendants
      // ... by xPath matching


      // !!! repeated in DocumentData
      String focusPath = request.getFocus().getCommonPath();
      if (focusPath.indexOf('/') == 0)
    focusPath = focusPath.substring(1);
  
      for (int i = 0; i < request.size(); i++) {
    MultiTokenLocator loc = request.getLocator(i);
    String locPath = loc.getCommonPath();
    //  System.out.println("locPath = " + locPath);
    if (locPath.startsWith(focusPath) || focusPath.equals("doc")) {
        //    System.out.println("matches");
        int nTerms = loc.getNumberOfTerms();
        Node locNode = followXPath(docData.getDocumentTree(),
                 locPath);
        for (int j = 0; j < nTerms; j++)
      if (loc.getTerm(j) != null)
          addNumber(followXPath(locNode, loc.getPath(j)),
              loc.getTokenNumber(j));
    }
      }


      ByteArrayOutputStream out = new ByteArrayOutputStream(1024*10);


      _html.reset();
      _html.setOutputStream(out);
  
      /*
        Node subtree = docNode;
        if (subtree.getType() == Node.TEXT) {
        subtree = subtree.getParent();
        while (getTocNodeForDocNode(subtree) == null)
        subtree = subtree.getParent();
        }
      */
      //System.out.println("subtree " + subtree.getName()
      //.getLocalPart());


      highlightTree();
      transform(getHtmlTransformForType(docType),
          new Node[] { subtree }, _html);
  
      byte[] bytes = out.toByteArray();
      System.out.println(bytes.length + " HTML bytes");
      result.setHTML(bytes);
      result.setNumberOfHighlights(_html.getNumberOfHighlights());
      /*
        OutputStream outf = new FileOutputStream("/tmp/test.html");
        outf.write(bytes);
        outf.close();
      */
      return result;
  }
  catch (Exception e) {
      e.printStackTrace();
      throw e;
  }
  finally {
      revertTree();
      _tokenNumbers.clear();
      _substituted.setSize(0);
      System.out.println((System.currentTimeMillis() - start)
             +" msec getDocumentFragment");
  }
    }
  
    public synchronized CollectionModel getCollectionModel(String docType)
  throws Exception {
  try {
      URL modelUrl = new URL(_catalog.getModel(docType));
      Node modelRoot = getDocumentTree(modelUrl, "");
      URL identityTransfUrl = new URL(Http + "identity.xsl");
      Transform tocTransform = getTransform(identityTransfUrl);
      TocTree tree = TocTree.makeTocTree(modelRoot, tocTransform, this);
      CollectionModel model = new CollectionModel();
      model.setTree(tree);
      return model;
  }
  catch (Exception e) {
      System.err.println("getCollectionModel ");
      e.printStackTrace();
      throw e;
  }
    }
}
Source Code of com.sun.xmlsearch.xml.qe.DocumentFragmentServerImpl$DocumentData

Related Classes of com.sun.xmlsearch.xml.qe.DocumentFragmentServerImpl$DocumentData