/*
* $Header: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java,v 1.2 2004/07/11 10:12:50 unico Exp $
* $Revision: 1.2 $
* $Date: 2004/07/11 10:12:50 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* Not intended for production.
* Bug: running in test mode everything works fine,
* in slide context no index is deleted when deleting resources.
*
*/
public class LuceneIndexer
{
private static final String DOC_ID = "documentId";
private String indexDb;
public LuceneIndexer (String indexDb)
{
this.indexDb = indexDb;
}
/**
* removes an index for a docId
* TODO: works in testmode (running Main), deletes nothing in slide context
*
* @param docId a String
*
* @throws IOException
*
*/
public void removeIndex (String docId) throws IOException
{
Directory directory = FSDirectory.getDirectory (indexDb, false);
IndexReader reader = IndexReader.open(directory);
Term deleteTerm = new Term(DOC_ID, docId);
reader.delete(deleteTerm);
reader.close();
directory.close();
}
/**
* Method createIndex
*
* @param docId a String
*
* @throws IOException
* @throws Exception
*
*/
public void index (String docId, Reader reader)
throws Exception
{
IndexWriter writer =
new IndexWriter(indexDb, new StandardAnalyzer(), false);
// reader = new FileReader (docId);
Document doc = new Document();
doc.add (Field.Text ("contents", reader));
Field field = new Field(DOC_ID, docId, true, true, true);
doc.add(field);
writer.addDocument(doc);
writer.optimize();
writer.close();
}
private static Reader getReader (String file) throws IOException
{
FileReader reader = new FileReader (file);
return reader;
}
private Set contains(String stringToFind) throws Exception
{
Set resultSet = new HashSet();
Searcher searcher = new IndexSearcher (indexDb);
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse(stringToFind, "contents", analyzer);
Hits hits = searcher.search(query);
int noOfHits = hits.length();
for (int i = 0; i < noOfHits; i++)
{
Document doc = hits.doc(i);
String docId = doc.get(DOC_ID);
resultSet.add(docId);
}
searcher.close();
return resultSet;
}
/**
* Test. To run, adopt INDEX_DB and put two text files in current directory,
* otto.txt and fritz.txt containing the strings
* "hallo otto" and "hallo fritz"
*
* @param args a String[]
*
* @throws Exception
*
*/
public static void main(String[] args) throws Exception
{
String INDEX_DB ="D:\\projects\\tmp\\index";
LuceneIndexer indexer = new LuceneIndexer (INDEX_DB);
IndexWriter writer =
new IndexWriter(INDEX_DB, new StandardAnalyzer(), true);
writer.close();
String ottoFile = "otto.txt";
new LuceneIndexer (INDEX_DB).index (ottoFile, getReader(ottoFile));
System.out.println("expect one element otto.txt");
displayResult(indexer.contains("otto"));
new LuceneIndexer (INDEX_DB).index("fritz.txt", getReader("fritz.txt"));
System.out.println("expect fritz.txt");
displayResult(indexer.contains("fritz"));
System.out.println("expect fritz.txt and otto.txt");
displayResult(indexer.contains("Hallo"));
new LuceneIndexer (INDEX_DB).removeIndex ("otto.txt");
System.out.println("expect fritz.txt");
displayResult (indexer.contains("Hallo"));
System.out.println("expect null");
displayResult (indexer.contains("otto"));
new LuceneIndexer (INDEX_DB).removeIndex ("fritz.txt");
System.out.println("expect null");
displayResult(indexer.contains("fritz"));
}
private static void displayResult(Set result)
{
for (Iterator iter = result.iterator(); iter.hasNext();)
{
String element = (String) iter.next();
System.out.println(element);
}
}
}