Package com.redpiranha.server.input.index

Source Code of com.redpiranha.server.input.index.IndexAgent

package com.redpiranha.server.input.index;

/*
* Copyright (C) 2001- 2004 Paul Browne, http://www.firstpartners.net,
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
* This product includes software developed by the
* Apache Software Foundation (http://www.apache.org)."
*
*/

import org.apache.log4j.Logger;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.*;

import java.io.*;
import com.redpiranha.common.util.data.*;
import com.redpiranha.server.input.index.*;
import com.redpiranha.server.input.index.data.*;
import com.redpiranha.server.input.index.exception.*;
import com.redpiranha.server.input.index.lucene.*;
import com.redpiranha.common.*;

import java.io.*;
import com.redpiranha.server.input.index.data.*;

/**
* Does the Hard work of Indexing
*
* @author Paul Browne , based on sample from Apache Lucene
*/
public class IndexAgent {
    //  Handle to logger
    static Logger log = Logger.getLogger(IndexAgent.class);


    /**
     * Index Name that we are using - this is the Directory the Lucene index will be stored under
     */
    public static final String INDEX_NAME = Constants.INDEX_DIR;

    /**
     * Index the files in the collection , stored under the Ints
     *
     * @param filesToIndex - Collection of Integers of the files to index
     *
     * @throws java.io.IOException - if Lucene cannot index
     * @throws ClassNotFoundException -  if we can't read file from the database
     * @throws IndexException - if anything else goes wrong
     * @throws FinderException - from underlying Meta Data search
     * @throws LocatorException - from underlying Meta Data search
     */
    public void indexFiles(Collection filesToIndex)
        throws java.io.IOException, ClassNotFoundException, IndexException {
       
        //Local Variables
        FileDocument fileEntity = null;
        DataContainer fileMetaData = null;
        Document thisDocument = null;
        int counter = 1;
        Iterator loopList = filesToIndex.iterator();

        //for debugging code
        Enumeration resultKeywords = null;
        Field thisField;

        //Handle to the Index

        IndexWriter writer =
            new IndexWriter(INDEX_NAME, new RpAnalyzer(), false);
        //false , uses existing index 

      

        //Debug info
        if ((log.isDebugEnabled()) && (loopList == null)) {

            log.debug("List of files to index was null");

        }

        //      Loop and get index the files on the database
        while (loopList.hasNext()) {
           
            //Get the next set of meta data
            fileEntity =(File) loopList.next();
           

            log.debug("Indexed File Meta Data:\n" + fileMetaData);
            log.debug("End Indexed File Meta Data");

            thisDocument = RpDocumentBuilder.createDocument(fileMetaData);
            writer.addDocument(thisDocument);

            //debug code
            log.debug("INDEXED DOCUMENT NUMBER:" + counter);
            counter++;

            //now loop through the keywords we added and log debug them
            if (log.isDebugEnabled()) {

                resultKeywords = thisDocument.fields();

                while (resultKeywords.hasMoreElements()) {
                    thisField = (Field) (resultKeywords.nextElement());
                    log.debug(
                        "indexed: contains field name:"
                            + thisField.name()
                            + " isIndexed:"
                            + thisField.isIndexed()
                            + " isStored:"
                            + thisField.isStored()
                            + " isTokenized:"
                            + thisField.isTokenized());
                }
            }

        }

        //Close off the writer
        writer.optimize();
        writer.close();

        //Log the time it took
    }

    /**
     * Get a set of Meta Data for this collection of file ID
     *
     * @param fileIds - collection of File Id's that we want the meta information for
     *
     * @return Enumeration of Item Entity Beans
     *
     * @throws LocatorException - if we cannot find the beans
     * @throws FinderException - if we cannot find the beans
     */
    private Enumeration getDocumentMetaData(Collection fileIds)
    //                                throws LocatorException, FinderException
    {
        //Local Variables
        //   ItemEntityHome itemHome = LocalServiceLocator.getInstance().getLocalItemEntity();

        Enumeration itemEnumeration = null;
        //itemHome.findByMultipleFileId(fileIds);

        return itemEnumeration;
    }

    /**
     * Clears any previous index and generates the index for <Strong>all</Strong> the files stored on the DB
     *
     * @throws java.io.IOException - if Lucene cannot index
     * @throws ClassNotFoundException -  if we can't read file from the database
     * @throws IndexException - if anything else goes wrong
     */
    public void resetIndex()
        throws java.io.IOException, ClassNotFoundException, IndexException {
       
        //New Write clears index
        log.debug("creating new index in:" + INDEX_NAME);
        IndexWriter writer =
            new IndexWriter(INDEX_NAME, new RpAnalyzer(), true);
        //true creates new index 

        //Close off the writer
        writer.optimize();
        writer.close();
    }
}
TOP

Related Classes of com.redpiranha.server.input.index.IndexAgent

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.