/**
* $Revision$
* $Date$
*
* Copyright (C) 2008 Jive Software. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jivesoftware.openfire.archive;
import java.io.File;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.Writer;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimerTask;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexModifier;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.dom4j.DocumentFactory;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.jivesoftware.database.DbConnectionManager;
import org.jivesoftware.openfire.reporting.util.TaskEngine;
import org.jivesoftware.util.JiveConstants;
import org.jivesoftware.util.JiveGlobals;
import org.jivesoftware.util.XMLProperties;
import org.picocontainer.Startable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xmpp.packet.JID;
/**
* Indexes archived conversations. If conversation archiving is not enabled,
* this class does nothing. The search index is maintained in the <tt>monitoring/search</tt>
* directory of the Openfire home directory. It's automatically updated with the latest
* conversation content as long as conversation archiving is enabled. The index update
* interval is controllec by the Jive property "conversation.search.updateInterval" and
* the default value is 15 minutes.
*
* @see ArchiveSearcher
* @author Matt Tucker
*/
public class ArchiveIndexer implements Startable {
private static final Logger Log = LoggerFactory.getLogger(ArchiveIndexer.class);
private static final String ALL_CONVERSATIONS =
"SELECT conversationID, isExternal FROM ofConversation";
private static final String NEW_CONVERSATIONS =
"SELECT DISTINCT conversationID FROM ofMessageArchive WHERE sentDate > ?";
private static final String CONVERSATION_METADATA =
"SELECT isExternal FROM ofConversation WHERE conversationID=?";
private static final String CONVERSATION_MESSAGES =
"SELECT conversationID, sentDate, fromJID, toJID, body FROM ofMessageArchive " +
"WHERE conversationID IN ? ORDER BY conversationID";
private File searchDir;
private TaskEngine taskEngine;
private ConversationManager conversationManager;
private XMLProperties indexProperties;
private Directory directory;
private IndexSearcher searcher;
private Lock writerLock;
private boolean stopped = false;
private boolean rebuildInProgress = false;
private RebuildFuture rebuildFuture;
private long lastModified = 0;
private TimerTask indexUpdater;
/**
* Constructs a new archive indexer.
*
* @param conversationManager a ConversationManager instance.
* @param taskEngine a task engine instance.
*/
public ArchiveIndexer(ConversationManager conversationManager, TaskEngine taskEngine) {
this.conversationManager = conversationManager;
this.taskEngine = taskEngine;
}
public void start() {
searchDir = new File(JiveGlobals.getHomeDirectory() +
File.separator + "monitoring" + File.separator + "search");
if (!searchDir.exists()) {
searchDir.mkdirs();
}
boolean indexCreated = false;
try {
loadPropertiesFile(searchDir);
// If the index already exists, use it.
if (IndexReader.indexExists(searchDir)) {
directory = FSDirectory.getDirectory(searchDir, false);
}
// Otherwise, create a new index.
else {
directory = FSDirectory.getDirectory(searchDir, true);
indexCreated = true;
}
}
catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
}
writerLock = new ReentrantLock(true);
// Force the directory unlocked if it's locked (due to non-clean app shut-down,
// for example).
try {
if (IndexReader.isLocked(directory)) {
Log.warn("Archiving search index was locked, probably due to non-clean " +
"application shutdown.");
IndexReader.unlock(directory);
}
}
catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
}
String modified = indexProperties.getProperty("lastModified");
if (modified != null) {
try {
lastModified = Long.parseLong(modified);
}
catch (NumberFormatException nfe) {
// Ignore.
}
}
// If the index has never been updated, build it from scratch.
if (lastModified == 0 || indexCreated) {
taskEngine.submit(new Runnable() {
public void run() {
rebuildIndex();
}
});
}
indexUpdater = new TimerTask() {
@Override
public void run() {
updateIndex();
}
};
int updateInterval = JiveGlobals.getIntProperty("conversation.search.updateInterval", 15);
taskEngine.scheduleAtFixedRate(indexUpdater, JiveConstants.MINUTE * 5,
JiveConstants.MINUTE * updateInterval);
}
public void stop() {
stopped = true;
indexUpdater.cancel();
if (searcher != null) {
try {
searcher.close();
}
catch (Exception e) {
Log.error(e.getMessage(), e);
}
searcher = null;
}
try {
directory.close();
}
catch (Exception e) {
Log.error(e.getMessage(), e);
}
directory = null;
indexProperties = null;
conversationManager = null;
searchDir = null;
rebuildFuture = null;
}
/**
* Returns the total size of the search index (in bytes).
*
* @return the total size of the search index (in bytes).
*/
public long getIndexSize() {
File [] files = searchDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
// Ignore the index properties file since it's not part of the index.
return !name.equals("indexprops.xml");
}
});
if (files == null) {
// Search folder does not exist so size of index is 0
return 0;
}
long size = 0;
for (File file : files) {
size += file.length();
}
return size;
}
/**
* Updates the search index with all new conversation data since the last index update.
*/
public void updateIndex() {
// Immediately return if the service has been stopped.
if (stopped) {
return;
}
// Do nothing if archiving is disabled.
if (!conversationManager.isArchivingEnabled()) {
return;
}
// If we're currently rebuilding the index, return.
if (rebuildInProgress) {
return;
}
writerLock.lock();
IndexModifier writer = null;
try {
writer = new IndexModifier(directory, new StandardAnalyzer(), false);
List<Long> conversationIDs = new ArrayList<Long>();
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(NEW_CONVERSATIONS);
pstmt.setLong(1, lastModified);
rs = pstmt.executeQuery();
while (rs.next()) {
conversationIDs.add(rs.getLong(1));
}
}
catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
}
finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
// Delete any conversations found -- they may have already been indexed, but
// updated since then.
for (long conversationID : conversationIDs) {
writer.deleteDocuments(new Term("conversationID", Long.toString(conversationID)));
}
// Load meta-data for each conversation.
Map<Long, Boolean> externalMetaData = new HashMap<Long, Boolean>();
for (long conversationID : conversationIDs) {
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(CONVERSATION_METADATA);
pstmt.setLong(1, conversationID);
rs = pstmt.executeQuery();
while (rs.next()) {
externalMetaData.put(conversationID, rs.getInt(1) == 1);
}
}
catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
}
finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
}
// Now index all the new conversations.
long newestDate = indexConversations(conversationIDs, externalMetaData, writer, false);
writer.optimize();
// Done indexing so store a last modified date.
if (newestDate != -1) {
lastModified = newestDate;
indexProperties.setProperty("lastModified", Long.toString(lastModified));
}
}
catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
}
finally {
if (writer != null) {
try {
writer.close();
}
catch (Exception e) {
Log.error(e.getMessage(), e);
}
}
writerLock.unlock();
}
}
/**
* Rebuilds the search index with all archived conversation data. This method returns
* a Future that represents the status of the index rebuild process (also available
* via {@link #getIndexRebuildProgress()}). The integer value
* (values 0 through 100) represents the percentage of work done. If message archiving
* is disabled, this method will return <tt>null</tt>.
*
* @return a Future to indicate the status of rebuilding the index or <tt>null</tt> if
* rebuilding the index is not possible.
*/
public synchronized Future<Integer> rebuildIndex() {
// Immediately return if the service has been stopped.
if (stopped) {
return null;
}
// If a rebuild is already happening, return.
if (rebuildInProgress) {
return null;
}
rebuildInProgress = true;
// Do nothing if archiving is disabled.
if (!conversationManager.isArchivingEnabled()) {
return null;
}
// Create a future to track the index rebuild progress.
rebuildFuture = new RebuildFuture();
// Create a runnable that will perform the actual rebuild work.
Runnable rebuildTask = new Runnable() {
public void run() {
List<Long> conversationIDs = new ArrayList<Long>();
Map<Long, Boolean> externalMetaData = new HashMap<Long, Boolean>();
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(ALL_CONVERSATIONS);
rs = pstmt.executeQuery();
while (rs.next()) {
long conversationID = rs.getLong(1);
conversationIDs.add(conversationID);
externalMetaData.put(conversationID, rs.getInt(2) == 1);
}
}
catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
}
finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
if (!conversationIDs.isEmpty()) {
// Index the conversations.
writerLock.lock();
IndexModifier writer = null;
try {
writer = new IndexModifier(directory, new StandardAnalyzer(), true);
long newestDate = indexConversations(conversationIDs, externalMetaData,
writer, true);
writer.optimize();
// Done indexing so store a last modified date.
if (newestDate != -1) {
lastModified = newestDate;
indexProperties.setProperty("lastModified", Long.toString(lastModified));
}
}
catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
}
finally {
if (writer != null) {
try {
writer.close();
}
catch (Exception e) {
Log.error(e.getMessage(), e);
}
}
writerLock.unlock();
}
}
// Done rebuilding the index, so reset state.
rebuildFuture = null;
rebuildInProgress = false;
}
};
taskEngine.submit(rebuildTask);
return rebuildFuture;
}
/**
* Returns a Future representing the status of an index rebuild operation. This is the
* same Future returned by the {@link #rebuildIndex()} method; access is provided via
* this method as a convenience. If the index is not currently being rebuilt, this method
* will return <tt>null</tt>.
*
* @return a Future that represents the index rebuild status or <tt>null</tt> if the
* index is not being rebuilt.
*/
public Future<Integer> getIndexRebuildProgress() {
return rebuildFuture;
}
/**
* Indexes a set of conversations. Each conversation is stored as a single Lucene document
* by appending message bodies together. The date of the newest message indexed is
* returned, or -1 if no conversations are indexed.
*
* @param conversationIDs the ID's of the conversations to index.
* @param externalMetaData meta-data about whether each conversation involves a participant on
* an external server.
* @param writer an IndexModifier to add the documents to.
* @param indexRebuild true if this is an index rebuild operation.
* @return the date of the newest message archived.
*/
private long indexConversations(List<Long> conversationIDs, Map<Long, Boolean> externalMetaData,
IndexModifier writer, boolean indexRebuild) throws IOException
{
if (conversationIDs.isEmpty()) {
return -1;
}
// Keep track of how many conversations we index for index rebuild stats.
int indexedConversations = 0;
long newestDate = -1;
// Index 250 items at a time.
final int OP_SIZE = 250;
int n = ((conversationIDs.size() - 1) / OP_SIZE);
if (n == 0) {
n = 1;
}
for (int i = 0; i < n; i++) {
StringBuilder inSQL = new StringBuilder();
inSQL.append(" (");
int start = i * OP_SIZE;
int end = (start + OP_SIZE > conversationIDs.size()) ? conversationIDs.size() : start + OP_SIZE;
if (end > conversationIDs.size()) {
end = conversationIDs.size();
}
inSQL.append(conversationIDs.get(start));
for (int j = start + 1; j < end; j++) {
inSQL.append(", ").append(conversationIDs.get(j));
}
inSQL.append(")");
// Get the messages.
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = DbConnectionManager.getConnection();
pstmt = con.prepareStatement(CONVERSATION_MESSAGES.replaceAll("\\?", inSQL.toString()));
rs = pstmt.executeQuery();
long conversationID = -1;
long date = -1;
Set<String> jids = null;
StringBuilder text = null;
// Loop through each message. Each conversation is a single document. So, as
// we find each conversation we save off the last chunk of content as a document.
while (rs.next()) {
long id = rs.getLong(1);
if (id != conversationID) {
if (conversationID != -1) {
// Index the previously defined doc.
boolean external = externalMetaData.get(conversationID);
indexDocument(writer, conversationID, external, date, jids, text.toString());
}
// Reset the variables to index the next conversation.
conversationID = id;
date = rs.getLong(2);
jids = new TreeSet<String>();
// Get the JID's. Each JID may be stored in full format. We convert
// to bare JID for indexing so that searching is possible.
jids.add(new JID(rs.getString(3)).toBareJID());
jids.add(new JID(rs.getString(4)).toBareJID());
text = new StringBuilder();
}
// Make sure that we record the earliest date of the conversation start
// for consistency.
long msgDate = rs.getLong(2);
if (msgDate < date) {
date = msgDate;
}
// See if this is the newest message found so far.
if (msgDate > newestDate) {
newestDate = msgDate;
}
// Add the body of the current message to the buffer.
text.append(DbConnectionManager.getLargeTextField(rs, 5)).append("\n");
}
// Finally, index the last document found.
if (conversationID != -1) {
// Index the previously defined doc.
boolean external = externalMetaData.get(conversationID);
indexDocument(writer, conversationID, external, date, jids, text.toString());
}
// If this is an index rebuild, we need to track the percentage done.
if (indexRebuild) {
indexedConversations++;
rebuildFuture.setPercentageDone(indexedConversations/conversationIDs.size());
}
}
catch (SQLException sqle) {
Log.error(sqle.getMessage(), sqle);
}
finally {
DbConnectionManager.closeConnection(rs, pstmt, con);
}
}
return newestDate;
}
/**
* Indexes a single conversation.
*
* @param writer the index modifier.
* @param conversationID the ID of the conversation to index.
* @param external true if the conversation has a participant from an external server.
* @param date the date the conversation was started.
* @param jids the JIDs of the users in the conversation.
* @param text the full text of the conversation.
* @throws IOException if an IOException occurs.
*/
private void indexDocument(IndexModifier writer, long conversationID, boolean external,
long date, Set<String> jids, String text) throws IOException
{
Document document = new Document();
document.add(new Field("conversationID", String.valueOf(conversationID),
Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("external", String.valueOf(external),
Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("date", DateTools.timeToString(date, DateTools.Resolution.DAY),
Field.Store.YES, Field.Index.UN_TOKENIZED));
for (String jid : jids) {
document.add(new Field("jid", jid, Field.Store.YES, Field.Index.TOKENIZED));
}
document.add(new Field("text", text, Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(document);
}
/**
* Returns an IndexSearcher to search the archive index.
*
* @return an IndexSearcher.
* @throws IOException if an IOException occurs.
*/
synchronized IndexSearcher getSearcher() throws IOException {
// If the searcher hasn't been instantiated, create it.
if (searcher == null) {
searcher = new IndexSearcher(directory);
}
// See if the searcher needs to be closed due to the index being updated.
else if (!searcher.getIndexReader().isCurrent()) {
searcher.close();
searcher = new IndexSearcher(directory);
}
return searcher;
}
/**
* Loads a property manager for search properties if it isn't already
* loaded. If an XML file for the search properties isn't already
* created, it will attempt to make a file with default values.
*/
private void loadPropertiesFile(File searchDir) throws IOException {
File indexPropertiesFile = new File(searchDir, "indexprops.xml");
// Make sure the file actually exists. If it doesn't, a new file
// will be created.
// If it doesn't exists we have to create it.
if (!indexPropertiesFile.exists()) {
org.dom4j.Document doc = DocumentFactory.getInstance().createDocument(
DocumentFactory.getInstance().createElement("search"));
// Now, write out to the file.
Writer out = null;
try {
// Use JDOM's XMLOutputter to do the writing and formatting.
out = new FileWriter(indexPropertiesFile);
XMLWriter outputter = new XMLWriter(out, OutputFormat.createPrettyPrint());
outputter.write(doc);
outputter.flush();
}
catch (Exception e) {
Log.error(e.getMessage(), e);
}
finally {
try {
if (out != null) {
out.close();
}
}
catch (Exception e) {
// Ignore.
}
}
}
indexProperties = new XMLProperties(indexPropertiesFile);
}
/**
* A Future class to track the status of index rebuilding.
*/
private class RebuildFuture implements Future<Integer> {
private int percentageDone = 0;
public boolean cancel(boolean mayInterruptIfRunning) {
// Don't allow cancels.
return false;
}
public boolean isCancelled() {
return false;
}
public boolean isDone() {
return percentageDone == 100;
}
public Integer get() throws InterruptedException, ExecutionException {
return percentageDone;
}
public Integer get(long timeout, TimeUnit unit) throws InterruptedException,
ExecutionException, TimeoutException
{
return percentageDone;
}
/**
* Sets the percentage done.
*
* @param percentageDone the percentage done.
*/
public void setPercentageDone(int percentageDone) {
if (percentageDone < 0 || percentageDone > 100) {
throw new IllegalArgumentException("Invalid value: " + percentageDone);
}
this.percentageDone = percentageDone;
}
}
}