Package org.hibernate.search.backend.impl.lucene

Source Code of org.hibernate.search.backend.impl.lucene.NRTWorkspaceImpl$FlushStrategySelector

/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.backend.impl.lucene;

import java.io.IOException;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;

import org.hibernate.search.backend.impl.CommitPolicy;
import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.backend.AddLuceneWork;
import org.hibernate.search.backend.DeleteLuceneWork;
import org.hibernate.search.backend.FlushLuceneWork;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.OptimizeLuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.UpdateLuceneWork;
import org.hibernate.search.backend.impl.WorkVisitor;
import org.hibernate.search.indexes.impl.DirectoryBasedIndexManager;
import org.hibernate.search.indexes.spi.DirectoryBasedReaderProvider;
import org.hibernate.search.spi.WorkerBuildContext;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

/**
* A {@code Workspace} implementation taking advantage of NRT Lucene features.
* {@code IndexReader} instances are obtained directly from the {@code IndexWriter}, which is not forced
* to flush all pending changes to the {@code Directory} structure.
* <p/>
*
* Lucene requires in its current version to flush delete operations, or the {@code IndexReader}s
* retrieved via NRT will include deleted Document instances in queries; flushing delete operations
* happens to be quite expensive so this {@code Workspace} implementation attempts to detect when such
* a flush operation is needed.
* <p/>
*
* Applying write operations flags "indexReader requirements" with needs for either normal flush
* or flush including deletes, but does not update {@code IndexReader} instances. The {@code IndexReader}s
* are updated only if and when a fresh {@code IndexReader} is requested via {@link #openIndexReader()}.
* This method will check if it can return the last opened {@code IndexReader} or in case of the reader being stale
* open a fresh reader from the current {@code IndexWriter}.
* <p/>
*
* Generation counters are used to track need-at-least version versus last-updated-at version:
* shared state is avoided between index writers and reader threads to avoid high complexity.
* The method {@link #afterTransactionApplied(boolean, boolean)} might trigger multiple times flagging
* the index to be dirty without triggering an actual {@code IndexReader} refresh, so the version counters
* can have gaps: method {@link #refreshReaders()} will always jump to latest seen version, as it will
* refresh the index to satisfy both kinds of flush requirements (writes and deletes).
* <p/>
*
* We keep a reference {@code IndexReader} in the {@link #currentReader} atomic reference as a fast path
* for multiple read events when the index is not dirty.
* <p/>
*
* This class implements both {@code Workspace} and {@code ReaderProvider}.
*
* @author Sanne Grinovero <sanne@hibernate.org> (C) 2011 Red Hat Inc.
*/
public class NRTWorkspaceImpl extends AbstractWorkspaceImpl implements DirectoryBasedReaderProvider {

  private static final Log log = LoggerFactory.make();

  private final ReentrantLock writeLock = new ReentrantLock();
  private final AtomicReference<DirectoryReader> currentReader = new AtomicReference<DirectoryReader>();
  private final CommitPolicy commitPolicy = new NRTCommitPolicy( writerHolder );

  /**
   * Visits {@code LuceneWork} types to determine the kind of flushing we need to apply on the indexes
   */
  private final FlushStrategySelector flushStrategySelector = new FlushStrategySelector();

  /**
   * Set to true when this service is shutdown (not revertible)
   */
  private boolean shutdown = false;

  /**
   * When true a flush operation should make sure all write operations are flushed,
   * otherwise a simpler flush strategy can be picked.
   */
  private final AtomicBoolean needFlushWrites = new AtomicBoolean( true );

  /**
   * Often when flushing deletes don't need to be applied. Some operation might have requested otherwise:
   */
  private final AtomicBoolean needFlushDeletes = new AtomicBoolean( false );

  /**
   * Internal counter used to mark different generations of IndexReaders. Monotonic incremental.
   * Not expecting an overflow in this planet's lifetime.
   */
  private final AtomicLong readerGeneration = new AtomicLong( 0 );

  /**
   * When refreshing an {@code IndexReader} to achieve a fresh snapshot to a generation, we need to check this
   * value to see if deletions need to be flushed. We try hard to not flush deletions as that is a
   * very expensive operation.
   * NOTE: concurrently accessed. Guarded by readerGenRequiringFlushWrites: read the other first, write it last.
   */
  private long readerGenRequiringFlushDeletes = 0;

  /**
   * As with {@link #readerGenRequiringFlushDeletes}, if this value is above the value of {@link #currentReaderGen}
   * a new {@code IndexReader} should be opened as the current generation is stale.
   */
  private volatile long readerGenRequiringFlushWrites = 0;

  /**
   * Generation identifier of the current open {@code IndexReader} (the one stored in {@link #currentReader}
   */
  private volatile long currentReaderGen = 0;

  public NRTWorkspaceImpl(DirectoryBasedIndexManager indexManager, WorkerBuildContext buildContext, Properties cfg) {
    super( indexManager, buildContext, cfg );
  }

  @Override
  public void afterTransactionApplied(boolean someFailureHappened, boolean streaming) {
    commitPolicy.onChangeSetApplied( someFailureHappened, streaming );
    if ( ! streaming ) {
      setupNewReadersRequirements();
    }
  }

  /**
   * Translates fields as{@code needFlushWrites} and {@code needFlushDeletes} in a set of requirements as checked
   * by reader threads. This is commonly invoked by a single thread (so no contention on this method
   * is expected) but it needs to expose a consistent view of the written fields to {@link #refreshReaders()}.
   * This is normally not invoked in parallel by multiple threads as the backend design allows a single working thread
   * per index, but it could be invoked concurrently when streaming work is being applied (when a MassIndexer is
   * running). Note that multiple threads invoking this in parallel might result in skipping some sequence numbers
   * but that's not a problem.
   */
  private void setupNewReadersRequirements() {
    if ( needFlushDeletes.get() || needFlushWrites.get() ) {
      final long nextGenId = readerGeneration.incrementAndGet();
      if ( needFlushDeletes.get() ) {
        this.needFlushDeletes.lazySet( false ); //flushed by volatile write at end of method
        this.readerGenRequiringFlushDeletes = nextGenId; //flushed by volatile write at end of method
      }
      this.needFlushWrites.lazySet( false ); //flushed by volatile write at end of method
      this.readerGenRequiringFlushWrites = nextGenId;
    }
  }

  /**
   * Invoked when a refresh of current {@code IndexReader}s is detected necessary.
   *
   * The implementation is blocking to maximize reuse of a single {@code IndexReader} (better for buffer usage,
   * caching, ..) and to avoid multiple threads trying and opening the same resources at the same time.
   *
   * @return the refreshed {@code IndexReader}
   */
  private synchronized DirectoryReader refreshReaders() {
    //double-check for the case we don't need anymore to refresh
    if ( indexReaderIsFresh() ) {
      return currentReader.get();
    }
    //order of the following two reads DOES matter:
    final long readerGenRequiringFlushWrites = this.readerGenRequiringFlushWrites;
    final long readerGenRequiringFlushDeletes = this.readerGenRequiringFlushDeletes;
    final boolean flushDeletes = currentReaderGen < readerGenRequiringFlushDeletes;
    final long openingGen = Math.max( readerGenRequiringFlushDeletes, readerGenRequiringFlushWrites );

    final DirectoryReader newIndexReader = writerHolder.openNRTIndexReader( flushDeletes );
    final DirectoryReader oldReader = currentReader.getAndSet( newIndexReader );
    this.currentReaderGen = openingGen;
    try {
      if ( oldReader != null ) {
        oldReader.decRef();
      }
    }
    catch (IOException e) {
      log.unableToCloseLuceneIndexReader( e );
    }
    return newIndexReader;
  }

  private boolean indexReaderIsFresh() {
    final long currentReaderGen = this.currentReaderGen;
    //Note it reads the volatile first. These two longs are always updated in pairs.
    return currentReaderGen >= readerGenRequiringFlushWrites && currentReaderGen >= readerGenRequiringFlushDeletes;
  }

  @Override
  public DirectoryReader openIndexReader() {
    return openIndexReader( ! indexReaderIsFresh() );
  }

  /**
   * @param needRefresh when {@code false} it won't guarantee the index reader to be affected by "latest" changes
   * @return returns an {@code IndexReader} instance, either pooled or a new one
   */
  private DirectoryReader openIndexReader(final boolean needRefresh) {
    DirectoryReader indexReader;
    if ( needRefresh ) {
      indexReader = refreshReaders();
    }
    else {
      indexReader = currentReader.get();
    }
    if ( indexReader == null ) {
      writeLock.lock();
      try {
        if ( shutdown ) {
          throw new AssertionFailure( "IndexReader requested after ReaderProvider is shutdown" );
        }
        indexReader = currentReader.get();
        if ( indexReader == null ) {
          indexReader = writerHolder.openDirectoryIndexReader();
          currentReader.set( indexReader );
        }
      }
      finally {
        writeLock.unlock();
      }
    }
    if ( indexReader.tryIncRef() ) {
      return indexReader;
    }
    else {
      //In this case we have a race: the chosen IndexReader was closed before we could increment its reference, so we need
      //to try again. Basically an optimistic lock as the race condition is very unlikely.
      //Changes should be tested at least with ReadWriteParallelismTest (in the performance tests module).
      //In case new writes happened there is no need to refresh again.
      return openIndexReader( false );
    }
  }

  @Override
  public void closeIndexReader(IndexReader reader) {
    if ( reader == null ) {
      return;
    }
    try {
      //don't use IndexReader#close as it prevents further counter decrements!
      reader.decRef();
    }
    catch (IOException e) {
      log.unableToCloseLuceneIndexReader( e );
    }
  }

  @Override
  public void initialize(DirectoryBasedIndexManager indexManager, Properties props) {
  }

  @Override
  public void stop() {
    writeLock.lock();
    try {
      final IndexReader oldReader = currentReader.getAndSet( null );
      closeIndexReader( oldReader );
      shutdown = true;
    }
    finally {
      writeLock.unlock();
    }
  }

  @Override
  public void flush() {
    //Even if this is the NRT workspace, Flush is implemented as a real Flush to make sure
    //MassIndexer output is committed to permanent storage
    commitPolicy.onFlush();
  }

  @Override
  public void notifyWorkApplied(LuceneWork work) {
    incrementModificationCounter();
    work.getWorkDelegate( flushStrategySelector ).apply( this );
  }

  @Override
  public CommitPolicy getCommitPolicy() {
    return commitPolicy;
  }

  /**
   * Visits each kind of {@code LuceneWork} we're processing to define which kind
   * of flushing strategy we need to apply to create consistent index readers.
   */
  private static class FlushStrategySelector implements WorkVisitor<FlushStrategyNeed> {

    @Override
    public FlushStrategyNeed getDelegate(AddLuceneWork addLuceneWork) {
      return FlushStrategyNeed.FLUSH_WRITES;
    }

    @Override
    public FlushStrategyNeed getDelegate(DeleteLuceneWork deleteLuceneWork) {
      return FlushStrategyNeed.FLUSH_DELETIONS;
    }

    @Override
    public FlushStrategyNeed getDelegate(OptimizeLuceneWork optimizeLuceneWork) {
      return FlushStrategyNeed.NONE;
    }

    @Override
    public FlushStrategyNeed getDelegate(PurgeAllLuceneWork purgeAllLuceneWork) {
      return FlushStrategyNeed.FLUSH_DELETIONS;
    }

    @Override
    public FlushStrategyNeed getDelegate(UpdateLuceneWork updateLuceneWork) {
      return FlushStrategyNeed.FLUSH_WRITES_AND_DELETES;
    }

    @Override
    public FlushStrategyNeed getDelegate(FlushLuceneWork flushLuceneWork) {
      return FlushStrategyNeed.FLUSH_WRITES_AND_DELETES;
    }
  }

  private enum FlushStrategyNeed {
    NONE {
      @Override
      void apply(final NRTWorkspaceImpl workspace) {
      }
    },
    FLUSH_DELETIONS {
      @Override
      void apply(final NRTWorkspaceImpl workspace) {
        // AtomicBoolean#lazySet is good enough as we only want to provide reads consistent with the state
        // the application is expecting. If for example no other flush is happening down the road
        // (which will eventually flush this write too) we're fine for other cores to "see"
        // IndexReader instances slightly stale.
        workspace.needFlushDeletes.lazySet( true );
      }
    },
    FLUSH_WRITES {
      @Override
      void apply(final NRTWorkspaceImpl workspace) {
        //See FLUSH_DELETIONS for why #lazySet is good enough.
        workspace.needFlushWrites.lazySet( true );
      }
    },
    FLUSH_WRITES_AND_DELETES {
      @Override
      void apply(NRTWorkspaceImpl workspace) {
        FLUSH_DELETIONS.apply( workspace );
        FLUSH_WRITES.apply( workspace );
      }
    };
    abstract void apply(NRTWorkspaceImpl workspace);
  }

}
TOP

Related Classes of org.hibernate.search.backend.impl.lucene.NRTWorkspaceImpl$FlushStrategySelector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.