Source Code of org.apache.slide.index.LuceneIndexer

/*
 * $Header: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java,v 1.2 2004/07/11 10:12:50 unico Exp $
 * $Revision: 1.2 $
 * $Date: 2004/07/11 10:12:50 $
 *
 * ====================================================================
 *
 * Copyright 1999-2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */




package org.apache.slide.index;


import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


/**
 * Not intended for production.
 * Bug: running in test mode everything works fine,
 * in slide context no index is deleted when deleting resources.
 *
 */
public class LuceneIndexer
{
    private static final String DOC_ID = "documentId";
    
    
    private String indexDb;
    
    
    public LuceneIndexer (String indexDb)
    {
        this.indexDb = indexDb;
    }
    
    
    /**
     * removes an index for a docId
     * TODO: works in testmode (running Main), deletes nothing in slide context
     *
     * @param    docId               a  String
     *
     * @throws   IOException
     *
     */
    public void removeIndex (String docId) throws IOException
    {
        Directory directory = FSDirectory.getDirectory (indexDb, false);
        IndexReader reader = IndexReader.open(directory);
        
        Term deleteTerm = new Term(DOC_ID, docId);
        reader.delete(deleteTerm);
        reader.close();
        directory.close();
    }
    
    /**
     * Method createIndex
     *
     * @param    docId               a  String
     *
     * @throws   IOException
     * @throws   Exception
     *
     */
    public void index (String docId, Reader reader)
        throws Exception
    {
        IndexWriter writer =
            new IndexWriter(indexDb, new StandardAnalyzer(), false);
        
//      reader = new FileReader (docId);
        
        Document doc = new Document();
        doc.add (Field.Text ("contents", reader));
        Field field = new Field(DOC_ID, docId, true, true, true);
        doc.add(field);
        writer.addDocument(doc);
        writer.optimize();
        writer.close();
    }
    
    private static Reader getReader (String file) throws IOException
    {
        FileReader reader = new FileReader (file);
        return reader;
    }
    
    private Set contains(String stringToFind) throws Exception
    {
        Set resultSet = new HashSet();
        Searcher searcher = new IndexSearcher (indexDb);
        Analyzer analyzer = new StandardAnalyzer();
        
        Query query = QueryParser.parse(stringToFind, "contents", analyzer);
        
        Hits hits = searcher.search(query);
        
        int noOfHits = hits.length();
        for (int i = 0; i < noOfHits; i++)
        {
            Document doc = hits.doc(i);
            String docId = doc.get(DOC_ID);
            resultSet.add(docId);
        }
        searcher.close();
        return resultSet;
    }
    
    
    /**
     * Test. To run, adopt INDEX_DB and put two text files in current directory,
     * otto.txt and fritz.txt containing the strings
     * "hallo otto" and "hallo fritz"
     *
     * @param    args                a  String[]
     *
     * @throws   Exception
     *
     */
    public static void main(String[] args) throws Exception
    {
        String INDEX_DB ="D:\\projects\\tmp\\index";
        
        LuceneIndexer indexer = new LuceneIndexer (INDEX_DB);
        
        IndexWriter writer =
            new IndexWriter(INDEX_DB, new StandardAnalyzer(), true);
        
        writer.close();
            
        String ottoFile = "otto.txt";
        new LuceneIndexer (INDEX_DB).index (ottoFile, getReader(ottoFile));
        
        System.out.println("expect one element otto.txt");
        displayResult(indexer.contains("otto"));
        
        new LuceneIndexer (INDEX_DB).index("fritz.txt", getReader("fritz.txt"));
        System.out.println("expect fritz.txt");
        displayResult(indexer.contains("fritz"));
        
        System.out.println("expect fritz.txt and otto.txt");
        displayResult(indexer.contains("Hallo"));
        
        new LuceneIndexer (INDEX_DB).removeIndex ("otto.txt");
        
        System.out.println("expect fritz.txt");
        displayResult (indexer.contains("Hallo"));
        
        System.out.println("expect null");
        displayResult (indexer.contains("otto"));
        
        new LuceneIndexer (INDEX_DB).removeIndex ("fritz.txt");
        
        System.out.println("expect null");
        displayResult(indexer.contains("fritz"));
    }
    
    private static void displayResult(Set result)
    {
        for (Iterator iter = result.iterator(); iter.hasNext();)
        {
            String element = (String) iter.next();
            System.out.println(element);
        }
        
    }
}
Source Code of org.apache.slide.index.LuceneIndexer

Related Classes of org.apache.slide.index.LuceneIndexer