/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.exoplatform.services.jcr.impl.core.query.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.exoplatform.services.jcr.impl.core.query.IndexerIoMode;
import org.exoplatform.services.jcr.impl.core.query.IndexerIoModeHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.StringReader;
import java.util.BitSet;
import java.util.Iterator;
/**
* Implements common functionality for a lucene index.
* <p/>
* Note on synchronization: This class is not entirely thread-safe. Certain
* concurrent access is however allowed. Read-only access on this index using
* {@link #getReadOnlyIndexReader()} is thread-safe. That is, multiple threads
* my call that method concurrently and use the returned IndexReader at the same
* time.<br/>
* Modifying threads must be synchronized externally in a way that only one
* thread is using the returned IndexReader and IndexWriter instances returned
* by {@link #getIndexReader()} and {@link #getIndexWriter()} at a time.<br/>
* Concurrent access by <b>one</b> modifying thread and multiple read-only
* threads is safe!
*/
abstract class AbstractIndex
{
/** The logger instance for this class */
private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.AbstractIndex");
/** PrintStream that pipes all calls to println(String) into log.info() */
private static final LoggingPrintStream STREAM_LOGGER = new LoggingPrintStream();
/** Executor with a pool size equal to the number of available processors */
private static final DynamicPooledExecutor EXECUTOR = new DynamicPooledExecutor();
/** The currently set IndexWriter or <code>null</code> if none is set */
private IndexWriter indexWriter;
/** The currently set IndexReader or <code>null</code> if none is set */
private CommittableIndexReader indexReader;
/** The underlying Directory where the index is stored */
private Directory directory;
/** Analyzer we use to tokenize text */
private Analyzer analyzer;
/** The similarity in use for indexing and searching. */
private final Similarity similarity;
/** Compound file flag */
private boolean useCompoundFile = true;
/** maxFieldLength config parameter */
private int maxFieldLength = SearchIndex.DEFAULT_MAX_FIELD_LENGTH;
/** termInfosIndexDivisor config parameter */
private int termInfosIndexDivisor = SearchIndex.DEFAULT_TERM_INFOS_INDEX_DIVISOR;
/**
* The document number cache if this index may use one.
*/
private DocNumberCache cache;
/** The shared IndexReader for all read-only IndexReaders */
private SharedIndexReader sharedReader;
/**
* The most recent read-only reader if there is any.
*/
private ReadOnlyIndexReader readOnlyReader;
/**
* The indexing queue.
*/
private IndexingQueue indexingQueue;
/**
* Flag that indicates whether there was an index present in the directory
* when this AbstractIndex was created.
*/
private boolean isExisting;
protected final IndexerIoModeHandler modeHandler;
/**
* Constructs an index with an <code>analyzer</code> and a
* <code>directory</code>.
*
* @param analyzer the analyzer for text tokenizing.
* @param similarity the similarity implementation.
* @param directory the underlying directory.
* @param cache the document number cache if this index should use
* one; otherwise <code>cache</code> is
* <code>null</code>.
* @param indexingQueue the indexing queue.
* @throws IOException if the index cannot be initialized.
*/
AbstractIndex(final Analyzer analyzer, Similarity similarity, final Directory directory, DocNumberCache cache,
IndexingQueue indexingQueue, IndexerIoModeHandler modeHandler) throws IOException
{
this.analyzer = analyzer;
this.similarity = similarity;
this.directory = directory;
this.cache = cache;
this.indexingQueue = indexingQueue;
this.modeHandler = modeHandler;
AbstractIndex.this.isExisting = IndexReader.indexExists(directory);
if (!isExisting)
{
indexWriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
// immediately close, now that index has been created
indexWriter.close();
indexWriter = null;
}
}
/**
* Default implementation returns the same instance as passed
* in the constructor.
*
* @return the directory instance passed in the constructor
*/
Directory getDirectory()
{
return directory;
}
/**
* Returns <code>true</code> if this index was openend on a directory with
* an existing index in it; <code>false</code> otherwise.
*
* @return <code>true</code> if there was an index present when this index
* was created; <code>false</code> otherwise.
*/
boolean isExisting()
{
return isExisting;
}
/**
* Adds documents to this index and invalidates the shared reader.
*
* @param docs the documents to add.
* @throws IOException if an error occurs while writing to the index.
*/
void addDocuments(final Document[] docs) throws IOException
{
final IndexWriter writer = getIndexWriter();
IOException ioExc = null;
try
{
for (Document doc : docs)
{
try
{
writer.addDocument(getFinishedDocument(doc));
}
catch (Throwable e)
{
if (ioExc == null)
{
if (e instanceof IOException)
{
ioExc = (IOException)e;
}
else
{
ioExc = Util.createIOException(e);
}
}
log.warn("Exception while inverting document", e);
}
}
}
finally
{
invalidateSharedReader();
}
if (ioExc != null)
{
throw ioExc;
}
}
/**
* Removes the document from this index. This call will not invalidate
* the shared reader. If a subclass whishes to do so, it should overwrite
* this method and call {@link #invalidateSharedReader()}.
*
* @param idTerm the id term of the document to remove.
* @throws IOException if an error occurs while removing the document.
* @return number of documents deleted
*/
int removeDocument(final Term idTerm) throws IOException
{
return getIndexReader().deleteDocuments(idTerm);
}
/**
* Returns an <code>IndexReader</code> on this index. This index reader
* may be used to delete documents.
*
* @return an <code>IndexReader</code> on this index.
* @throws IOException if the reader cannot be obtained.
*/
protected synchronized CommittableIndexReader getIndexReader() throws IOException
{
if (indexWriter != null)
{
indexWriter.close();
log.debug("closing IndexWriter.");
indexWriter = null;
}
if (indexReader == null || !indexReader.isCurrent())
{
IndexReader reader = IndexReader.open(getDirectory(), null, false, termInfosIndexDivisor);
// if modeHandler != null and mode==READ_ONLY, then reader should be with transient deletions.
// This is used to transiently update reader in clustered environment when some documents have
// been deleted. If index reader not null and already contains some transient deletions, but it
// is no more current, it will be re-created loosing deletions. They will already be applied by
// coordinator node in the cluster. And there is no need to inject them into the new reader
indexReader =
new CommittableIndexReader(reader, modeHandler != null && modeHandler.getMode() == IndexerIoMode.READ_ONLY);
}
return indexReader;
}
/**
* Returns a read-only index reader, that can be used concurrently with
* other threads writing to this index. The returned index reader is
* read-only, that is, any attempt to delete a document from the index
* will throw an <code>UnsupportedOperationException</code>.
*
* @param initCache if the caches in the index reader should be initialized
* before the index reader is returned.
* @return a read-only index reader.
* @throws IOException if an error occurs while obtaining the index reader.
*/
synchronized ReadOnlyIndexReader getReadOnlyIndexReader(final boolean initCache) throws IOException
{
// get current modifiable index reader
CommittableIndexReader modifiableReader = getIndexReader();
long modCount = modifiableReader.getModificationCount();
if (readOnlyReader != null)
{
if (readOnlyReader.getDeletedDocsVersion() == modCount)
{
// reader up-to-date
readOnlyReader.acquire();
return readOnlyReader;
}
else
{
// reader outdated
if (readOnlyReader.getRefCount() == 1)
{
// not in use, except by this index
// update the reader
readOnlyReader.updateDeletedDocs(modifiableReader);
readOnlyReader.acquire();
return readOnlyReader;
}
else
{
// cannot update reader, it is still in use
// need to create a new instance
readOnlyReader.release();
readOnlyReader = null;
}
}
}
// if we get here there is no up-to-date read-only reader
// capture snapshot of deleted documents
BitSet deleted = new BitSet(modifiableReader.maxDoc());
for (int i = 0; i < modifiableReader.maxDoc(); i++)
{
if (modifiableReader.isDeleted(i))
{
deleted.set(i);
}
}
if (sharedReader == null)
{
// create new shared reader
IndexReader reader = IndexReader.open(getDirectory(), null, true, termInfosIndexDivisor);
CachingIndexReader cr = new CachingIndexReader(reader, cache, initCache);
sharedReader = new SharedIndexReader(cr);
}
readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount);
readOnlyReader.acquire();
return readOnlyReader;
}
/**
* Returns a read-only index reader, that can be used concurrently with
* other threads writing to this index. The returned index reader is
* read-only, that is, any attempt to delete a document from the index
* will throw an <code>UnsupportedOperationException</code>.
*
* @return a read-only index reader.
* @throws IOException if an error occurs while obtaining the index reader.
*/
protected ReadOnlyIndexReader getReadOnlyIndexReader() throws IOException
{
return getReadOnlyIndexReader(false);
}
/**
* Returns an <code>IndexWriter</code> on this index.
* @return an <code>IndexWriter</code> on this index.
* @throws IOException if the writer cannot be obtained.
*/
protected synchronized IndexWriter getIndexWriter() throws IOException
{
if (indexReader != null)
{
indexReader.close();
log.debug("closing IndexReader.");
indexReader = null;
}
if (indexWriter == null)
{
indexWriter = new IndexWriter(getDirectory(), analyzer, new IndexWriter.MaxFieldLength(maxFieldLength));
indexWriter.setSimilarity(similarity);
indexWriter.setUseCompoundFile(useCompoundFile);
indexWriter.setInfoStream(STREAM_LOGGER);
}
return indexWriter;
}
/**
* Commits all pending changes to the underlying <code>Directory</code>.
* @throws IOException if an error occurs while commiting changes.
*/
protected void commit() throws IOException
{
commit(false);
}
/**
* Commits all pending changes to the underlying <code>Directory</code>.
*
* @param optimize if <code>true</code> the index is optimized after the
* commit.
* @throws IOException if an error occurs while commiting changes.
*/
protected synchronized void commit(final boolean optimize) throws IOException
{
if (indexReader != null)
{
log.debug("committing IndexReader.");
indexReader.flush();
}
if (indexWriter != null)
{
log.debug("committing IndexWriter.");
indexWriter.commit();
}
// optimize if requested
if (optimize)
{
IndexWriter writer = getIndexWriter();
writer.optimize();
writer.close();
indexWriter = null;
}
}
/**
* Closes this index, releasing all held resources.
*/
synchronized void close()
{
releaseWriterAndReaders();
if (directory != null)
{
try
{
directory.close();
}
catch (IOException e)
{
directory = null;
}
}
}
/**
* Releases all potentially held index writer and readers.
*/
protected void releaseWriterAndReaders()
{
if (indexWriter != null)
{
try
{
indexWriter.close();
}
catch (IOException e)
{
log.warn("Exception closing index writer: " + e.toString());
}
indexWriter = null;
}
if (indexReader != null)
{
try
{
indexReader.close();
}
catch (IOException e)
{
log.warn("Exception closing index reader: " + e.toString());
}
indexReader = null;
}
if (readOnlyReader != null)
{
try
{
readOnlyReader.release();
}
catch (IOException e)
{
log.warn("Exception closing index reader: " + e.toString());
}
readOnlyReader = null;
}
if (sharedReader != null)
{
try
{
sharedReader.release();
}
catch (IOException e)
{
log.warn("Exception closing index reader: " + e.toString());
}
sharedReader = null;
}
}
/**
* @return the number of bytes this index occupies in memory.
*/
synchronized long getRamSizeInBytes()
{
if (indexWriter != null)
{
return indexWriter.ramSizeInBytes();
}
else
{
return 0;
}
}
/**
* Closes the shared reader.
*
* @throws IOException if an error occurs while closing the reader.
*/
protected synchronized void invalidateSharedReader() throws IOException
{
// also close the read-only reader
if (readOnlyReader != null)
{
readOnlyReader.release();
readOnlyReader = null;
}
// invalidate shared reader
if (sharedReader != null)
{
sharedReader.release();
sharedReader = null;
}
}
/**
* Returns a document that is finished with text extraction and is ready to
* be added to the index.
*
* @param doc the document to check.
* @return <code>doc</code> if it is finished already or a stripped down
* copy of <code>doc</code> without text extractors.
* @throws IOException if the document cannot be added to the indexing
* queue.
*/
private Document getFinishedDocument(Document doc) throws IOException
{
if (!Util.isDocumentReady(doc))
{
Document copy = new Document();
// mark the document that reindexing is required
copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
Iterator fields = doc.getFields().iterator();
while (fields.hasNext())
{
Fieldable f = (Fieldable)fields.next();
Fieldable field = null;
Field.TermVector tv = getTermVectorParameter(f);
Field.Store stored = getStoreParameter(f);
Field.Index indexed = getIndexParameter(f);
if (f instanceof LazyTextExtractorField || f.readerValue() != null)
{
// replace all readers with empty string reader
field = new Field(f.name(), new StringReader(""), tv);
}
else if (f.stringValue() != null)
{
field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
}
else if (f.isBinary())
{
field = new Field(f.name(), f.binaryValue(), stored);
}
if (field != null)
{
field.setOmitNorms(f.getOmitNorms());
copy.add(field);
}
}
// schedule the original document for later indexing
Document existing = indexingQueue.addDocument(doc);
if (existing != null)
{
// the queue already contained a pending document for this
// node. -> dispose the document
Util.disposeDocument(existing);
}
// use the stripped down copy for now
doc = copy;
}
return doc;
}
//-------------------------< properties >-----------------------------------
/**
* The lucene index writer property: useCompountFile
*/
void setUseCompoundFile(boolean b)
{
useCompoundFile = b;
if (indexWriter != null)
{
indexWriter.setUseCompoundFile(b);
}
}
/**
* The lucene index writer property: maxFieldLength
*/
void setMaxFieldLength(int maxFieldLength)
{
this.maxFieldLength = maxFieldLength;
if (indexWriter != null)
{
indexWriter.setMaxFieldLength(maxFieldLength);
}
}
/**
* @return the current value for termInfosIndexDivisor.
*/
public int getTermInfosIndexDivisor()
{
return termInfosIndexDivisor;
}
/**
* Sets a new value for termInfosIndexDivisor.
*
* @param termInfosIndexDivisor the new value.
*/
public void setTermInfosIndexDivisor(int termInfosIndexDivisor)
{
this.termInfosIndexDivisor = termInfosIndexDivisor;
}
//------------------------------< internal >--------------------------------
/**
* Returns the index parameter set on <code>f</code>.
*
* @param f a lucene field.
* @return the index parameter on <code>f</code>.
*/
private Field.Index getIndexParameter(Fieldable f)
{
if (!f.isIndexed())
{
return Field.Index.NO;
}
else if (f.isTokenized())
{
return Field.Index.ANALYZED;
}
else
{
return Field.Index.NOT_ANALYZED;
}
}
/**
* Returns the store parameter set on <code>f</code>.
*
* @param f a lucene field.
* @return the store parameter on <code>f</code>.
*/
private Field.Store getStoreParameter(Fieldable f)
{
if (f.isCompressed())
{
return Field.Store.COMPRESS;
}
else if (f.isStored())
{
return Field.Store.YES;
}
else
{
return Field.Store.NO;
}
}
/**
* Returns the term vector parameter set on <code>f</code>.
*
* @param f a lucene field.
* @return the term vector parameter on <code>f</code>.
*/
private Field.TermVector getTermVectorParameter(Fieldable f)
{
if (f.isStorePositionWithTermVector() && f.isStoreOffsetWithTermVector())
{
return Field.TermVector.WITH_POSITIONS_OFFSETS;
}
else if (f.isStorePositionWithTermVector())
{
return Field.TermVector.WITH_POSITIONS;
}
else if (f.isStoreOffsetWithTermVector())
{
return Field.TermVector.WITH_OFFSETS;
}
else if (f.isTermVectorStored())
{
return Field.TermVector.YES;
}
else
{
return Field.TermVector.NO;
}
}
/**
* Adapter to pipe info messages from lucene into log messages.
*/
private static final class LoggingPrintStream extends PrintStream
{
/** Buffer print calls until a newline is written */
private StringBuffer buffer = new StringBuffer();
public LoggingPrintStream()
{
super(new OutputStream()
{
@Override
public void write(int b)
{
// do nothing
}
});
}
@Override
public void print(String s)
{
buffer.append(s);
}
@Override
public void println(String s)
{
buffer.append(s);
log.debug(buffer.toString());
buffer.setLength(0);
}
}
}