/*************************************************************************
*
* $RCSfile: XmlIndexer.java,v $
*
* $Revision: 1.1 $
*
* last change: $Author: abi $ $Date: 2000/11/30 18:03:48 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
*
* - GNU Lesser General Public License Version 2.1
* - Sun Industry Standards Source License Version 1.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*
* Sun Industry Standards Source License Version 1.1
* =================================================
* The contents of this file are subject to the Sun Industry Standards
* Source License Version 1.1 (the "License"); You may not use this file
* except in compliance with the License. You may obtain a copy of the
* License at http://www.openoffice.org/license.html.
*
* Software provided under this License is provided on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
* See the License for the specific provisions governing your rights and
* obligations concerning the Software.
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
package com.sun.xmlsearch.xml.indexer;
import java.net.URL;
import java.net.MalformedURLException;
import java.io.*;
import java.util.*;
import com.sun.xml.tree.XmlDocument;
import com.sun.xml.parser.Resolver;
import org.w3c.dom.*;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.sun.xmlsearch.util.PrefixTranslator;
import com.sun.xmlsearch.util.Configuration;
import com.sun.xmlsearch.util.ExtensibleURLStreamHandlerFactory;
public class XmlIndexer {
private boolean _verbose = true;
private final class ConfigurationData {
private Element _root;
private Hashtable _toAdd = new Hashtable();
private Hashtable _toRefresh = new Hashtable();
private Hashtable _toRemove = new Hashtable();
public void process(Element configElement) throws Exception {
_root = configElement;
String indexName = configElement.getAttribute("index");
String action = configElement.getAttribute("action");
String transform = configElement.getAttribute("transform");
String transfLoc = configElement.getAttribute("transformLocation");
XmlIndexBuilder builder = new XmlIndexBuilder(indexName);
if(transfLoc.length() > 0){
builder.setTransformLocation(transfLoc);
}
if ("create".equals(action)){
builder.clearIndex();
}
if (builder.init(transform)) {
processDirectories(builder);
/* !!! commented out for demo
prepareSets(configElement);
updateIndex(builder);
*/
builder.close();
}
}
private void processDirectories(XmlIndexBuilder builder) throws Exception {
NodeList list = _root.getElementsByTagName("IndexDirectory");
for (int i = 0; i < list.getLength(); i++) {
Element idEl = (Element)list.item(i);
String dirName = idEl.getAttribute("directory");
String extension = idEl.getAttribute("extension");
if (extension == null)
extension = ".xml";
boolean recurse = "yes".equals(idEl.getAttribute("recurse"));
Vector files = findXmlFiles(dirName, extension);
if (files.size() > 0) {
String[] translations =
Configuration.processPrefixTranslations(idEl);
PrefixTranslator translator =
PrefixTranslator.makePrefixTranslator(translations);
builder.setPrefixTranslator(translator);
Enumeration filesEnum = files.elements();
while (filesEnum.hasMoreElements()) {
File file = (File)filesEnum.nextElement();
URL url = file.toURL();
System.out.println("Indexing: " + url);
builder.indexDocument(url, "xml");
}
builder.setPrefixTranslator(null);
}
}
}
private void prepareSets(Element configElement) {
// a simplification for now
NodeList list = configElement.getElementsByTagName("Document");
for (int i = 0; i < list.getLength(); i++) {
Element docEl = (Element)list.item(i);
String docName = docEl.getAttribute("file");
String action = docEl.getAttribute("action");
if (action.equals("add")) {
// refresh is more general than add
if (_toRefresh.get(docName) == null)
_toAdd.put(docName, docName);
if (_verbose && _toRemove.remove(docName) != null)
System.out.println("remove/add " + docName);
}
else if (action.equals("remove")) {
_toRemove.put(docName, docName);
if (_verbose && _toAdd.remove(docName) != null)
System.out.println("add/remove " + docName);
if (_verbose && _toRefresh.remove(docName) != null)
System.out.println("refresh/remove " + docName);
}
else if (action.equals("refresh")) {
_toRefresh.put(docName, docName);
if (_verbose && _toRemove.remove(docName) != null)
System.out.println("remove/refresh " + docName);
if (_verbose && _toAdd.remove(docName) != null)
System.out.println("add/refresh " + docName);
}
}
}
private void updateIndex(XmlIndexBuilder builder) throws Exception {
builder.updateIndex(_toRemove, _toRefresh, _toAdd);
}
}
/*
private void process(String[] args) {
try {
for (int i = 0; i < args.length ; i++) {
if (args[i].equals("-config"))
processConfiguration(args[++i]);
}
}
catch (SAXParseException e) {
System.err.println(e.getMessage());
System.err.println("in LINE " + e.getLineNumber());
e.printStackTrace();
}
catch (Exception e) {
e.printStackTrace();
}
}
******/
private void configure(Element config) throws Exception {
ConfigurationData configuration = new ConfigurationData();
configuration.process(config);
}
private Vector findXmlFiles(String dirName, String extension) {
Vector result = new Vector();
findXmlFiles(new File(dirName), extension, result);
return result;
}
private void findXmlFiles(File directory, String extension, Vector result) {
if (directory.exists() && directory.isDirectory()) {
String[] files = directory.list();
for (int i = 0; i < files.length; i++) {
File file = new File(directory, files[i]);
if (file.isDirectory())
findXmlFiles(file, extension, result);
else if (file.isFile() && files[i].endsWith(extension))
result.addElement(file);
}
}
}
/* index named directory recursively */
/*
private void indexDirectory(String dirName) throws Exception {
Vector files = findXmlFiles(dirName);
if (files.size() > 0) {
XmlIndexBuilder builder = new XmlIndexBuilder(_indexName);
builder.clearIndex();
if (builder.init()) {
builder.addPrefixTranslation("file:/home/jacek/", "#A#");
builder.addPrefixTranslation("file:/files8/jacek/docs/", "#B#");
Enumeration filesEnum = files.elements();
while (filesEnum.hasMoreElements()) {
File file = (File)filesEnum.nextElement();
URL url = file.toURL();
System.out.println(url);
builder.indexDocument(url, "xml");
}
builder.close();
}
}
}
*******/
/*
private void indexFiles(String dirName) throws Exception {
File directory = new File(dirName);
if (directory.exists() && directory.isDirectory()) {
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".xml");
}
};
String[] xmlFiles = directory.list(filter);
if (xmlFiles.length > 0) {
XmlIndexBuilder builder = new XmlIndexBuilder(_indexName);
builder.clearIndex();
if (builder.init()) {
builder.addPrefixTranslation("file:/home/jacek/", "#A#");
builder.addPrefixTranslation("file:/files8/jacek/docs/", "#B#");
for (int i = 0; i < xmlFiles.length; i++) {
File xmlFile = new File(directory, xmlFiles[i]);
URL url = xmlFile.toURL();
System.out.println(url);
builder.indexDocument(url, "xml");
}
builder.close();
}
}
}
else
System.err.println(dirName + " not a valid directory");
}
*********/
public static void process(Element config) throws Exception {
(new XmlIndexer()).configure(config);
}
public static void main(String[] args) {
try {
final ExtensibleURLStreamHandlerFactory factory =
new ExtensibleURLStreamHandlerFactory();
factory.setHandler("star", "com.sun.xmlsearch.util.StarURLStreamHandler");
URL.setURLStreamHandlerFactory(factory);
Element config = Configuration.configElementFromArgs(args);
if (config != null)
process(config);
}
catch (Exception e) {
e.printStackTrace();
}
}
}