Source Code of org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader$CacheInitializer

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.exoplatform.services.jcr.impl.core.query.lucene;


import org.apache.commons.collections.map.LRUMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import java.io.IOException;
import java.text.NumberFormat;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;


/**
 * Implements an <code>IndexReader</code> that maintains caches to resolve
 * {@link #getParent(int, BitSet)} calls efficiently.
 * <p/>
 */
class CachingIndexReader extends FilterIndexReader
{


   /**
    * The logger instance for this class.
    */
   private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.CachingIndexReader");


   /**
    * The current value of the global creation tick counter.
    */
   private static long currentTick;


   /**
    * BitSet where bits that correspond to document numbers are set for
    * shareable nodes.
    */
   private final BitSet shareableNodes;


   /**
    * Cache of nodes parent relation. If an entry in the array is not null,
    * that means the node with the document number = array-index has the node
    * with <code>DocId</code> as parent.
    */
   private final DocId[] parents;


   /**
    * Initializes the {@link #parents} cache.
    */
   private CacheInitializer cacheInitializer;


   /**
    * Tick when this index reader was created.
    */
   private final long creationTick = getNextCreationTick();


   /**
    * Document number cache if available. May be <code>null</code>.
    */
   private final DocNumberCache cache;


   /**
    * Maps document number to node UUID.
    */
   private final Map<Integer, String> docNumber2uuid;


   /**
    * A cache of TermDocs that are regularly read from the index.
    */
   private final TermDocsCache termDocsCache;


   /**
    * Creates a new <code>CachingIndexReader</code> based on
    * <code>delegatee</code>
    *
    * @param delegatee the base <code>IndexReader</code>.
    * @param cache     a document number cache, or <code>null</code> if not
    *                  available to this reader.
    * @param initCache if the {@link #parents} cache should be initialized
    *                  when this index reader is constructed.
    * @throws IOException if an error occurs while reading from the index.
    */
   CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException
   {
      super(delegatee);
      this.cache = cache;
      this.parents = new DocId[delegatee.maxDoc()];
      this.shareableNodes = new BitSet();
      TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE, ""));
      try
      {
         while (tDocs.next())
         {
            shareableNodes.set(tDocs.doc());
         }
      }
      finally
      {
         tDocs.close();
      }
      this.cacheInitializer = new CacheInitializer(delegatee);
      if (initCache)
      {
         cacheInitializer.run();
      }
      // limit cache to 1% of maxDoc(), but at least 10.
      this.docNumber2uuid = Collections.synchronizedMap(new LRUMap(Math.max(10, delegatee.maxDoc() / 100)));
      this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES);
   }


   /**
    * Returns the <code>DocId</code> of the parent of <code>n</code> or
    * {@link DocId#NULL} if <code>n</code> does not have a parent
    * (<code>n</code> is the root node).
    *
    * @param n the document number.
    * @param deleted the documents that should be regarded as deleted.
    * @return the <code>DocId</code> of <code>n</code>'s parent.
    * @throws IOException if an error occurs while reading from the index.
    */
   DocId getParent(int n, BitSet deleted) throws IOException
   {
      DocId parent;
      boolean existing = false;
      parent = parents[n];


      if (parent != null)
      {
         existing = true;


         // check if valid and reset if necessary
         if (!parent.isValid(deleted))
         {
            if (log.isDebugEnabled())
            {
               log.debug(parent + " not valid anymore.");
            }
            parent = null;
         }
      }


      if (parent == null)
      {
         Document doc = document(n, FieldSelectors.UUID_AND_PARENT);
         String[] parentUUIDs = doc.getValues(FieldNames.PARENT);
         if (parentUUIDs.length == 0 || parentUUIDs[0].length() == 0)
         {
            // root node
            parent = DocId.NULL;
         }
         else
         {
            if (shareableNodes.get(n))
            {
               parent = DocId.create(parentUUIDs);
            }
            else
            {
               if (!existing)
               {
                  Term id = new Term(FieldNames.UUID, parentUUIDs[0]);
                  TermDocs docs = termDocs(id);
                  try
                  {
                     while (docs.next())
                     {
                        if (!deleted.get(docs.doc()))
                        {
                           parent = DocId.create(docs.doc());
                           break;
                        }
                     }
                  }
                  finally
                  {
                     docs.close();
                  }
               }
               // if still null, then parent is not in this index, or existing
               // DocId was invalid. thus, only allowed to create DocId from uuid
               if (parent == null)
               {
                  parent = DocId.create(parentUUIDs[0]);
               }
            }
         }


         // finally put to cache
         parents[n] = parent;
      }
      return parent;
   }


   /**
    * Returns the tick value when this reader was created.
    *
    * @return the creation tick for this reader.
    */
   public long getCreationTick()
   {
      return creationTick;
   }


   //--------------------< FilterIndexReader overwrites >----------------------


   /**
    * Uses the {@link #docNumber2uuid} cache for document lookups that are only
    * interested in the {@link FieldSelectors#UUID}.
    *
    * @param n the document number.
    * @param fieldSelector the field selector.
    * @return the document.
    * @throws CorruptIndexException if the index is corrupt.
    * @throws IOException if an error occurs while reading from the index.
    */
   public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException
   {
      if (fieldSelector == FieldSelectors.UUID)
      {
         Integer docNum = new Integer(n);
         Document doc;
         String uuid = docNumber2uuid.get(docNum);
         if (uuid == null)
         {
            doc = super.document(n, fieldSelector);
            uuid = doc.get(FieldNames.UUID);
            docNumber2uuid.put(docNum, uuid);
         }
         else
         {
            doc = new Document();
            doc.add(new Field(FieldNames.UUID, uuid.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
         }
         return doc;
      }
      else
      {
         return super.document(n, fieldSelector);
      }
   }


   /**
    * If the field of <code>term</code> is {@link FieldNames#UUID} this
    * <code>CachingIndexReader</code> returns a <code>TermDocs</code> instance
    * with a cached document id. If <code>term</code> has any other field
    * the call is delegated to the base <code>IndexReader</code>.<br/>
    * If <code>term</code> is for a {@link FieldNames#UUID} field and this
    * <code>CachingIndexReader</code> does not have such a document,
    * {@link EmptyTermDocs#INSTANCE} is returned.
    *
    * @param term the term to start the <code>TermDocs</code> enumeration.
    * @return a TermDocs instance.
    * @throws IOException if an error occurs while reading from the index.
    */
   public TermDocs termDocs(Term term) throws IOException
   {
      if (term!=null && term.field() == FieldNames.UUID)
      {
         // check cache if we have one
         if (cache != null)
         {
            DocNumberCache.Entry e = cache.get(term.text());
            if (e != null)
            {
               // check if valid
               // the cache may contain entries from a different reader
               // with the same uuid. that happens when a node is updated
               // and is reindexed. the node 'travels' from an older index
               // to a newer one. the cache will still contain a cache
               // entry from the old until it is overwritten by the
               // newer index.
               if (e.creationTick == creationTick && !isDeleted(e.doc))
               {
                  return new SingleTermDocs(e.doc);
               }
            }


            // not in cache or invalid
            TermDocs docs = in.termDocs(term);
            try
            {
               if (docs.next())
               {
                  // put to cache
                  cache.put(term.text(), this, docs.doc());
                  // and return
                  return new SingleTermDocs(docs.doc());
               }
               else
               {
                  return EmptyTermDocs.INSTANCE;
               }
            }
            finally
            {
               docs.close();
            }
         }
      }
      return termDocsCache.termDocs(term);
   }


   /**
    * {@inheritDoc}
    */
   protected void doClose() throws IOException
   {
      try
      {
         cacheInitializer.waitUntilStopped();
      }
      catch (InterruptedException e)
      {
         // ignore
      }
      super.doClose();
   }


   //----------------------< internal >----------------------------------------


   /**
    * Returns the next creation tick value.
    *
    * @return the next creation tick value.
    */
   private static long getNextCreationTick()
   {
      synchronized (CachingIndexReader.class)
      {
         return currentTick++;
      }
   }


   /**
    * Initializes the {@link CachingIndexReader#parents} cache.
    */
   private class CacheInitializer implements Runnable
   {


      /**
       * From where to read.
       */
      private final IndexReader reader;


      /**
       * Set to <code>true</code> while this initializer does its work.
       */
      private boolean running = false;


      /**
       * Set to <code>true</code> when this index reader is about to be closed.
       */
      private volatile boolean stopRequested = false;


      /**
       * Creates a new initializer with the given <code>reader</code>.
       *
       * @param reader an index reader.
       */
      public CacheInitializer(IndexReader reader)
      {
         this.reader = reader;
      }


      /**
       * Initializes the cache.
       */
      public void run()
      {
         synchronized (this)
         {
            running = true;
         }
         try
         {
            if (stopRequested)
            {
               // immediately return when stop is requested
               return;
            }
            initializeParents(reader);
         }
         catch (Exception e)
         {
            // only log warn message during regular operation
            if (!stopRequested)
            {
               log.warn("Error initializing parents cache.", e);
            }
         }
         finally
         {
            synchronized (this)
            {
               running = false;
               notifyAll();
            }
         }
      }


      /**
       * Waits until this cache initializer is stopped.
       *
       * @throws InterruptedException if the current thread is interrupted.
       */
      public void waitUntilStopped() throws InterruptedException
      {
         stopRequested = true;
         synchronized (this)
         {
            while (running)
            {
               wait();
            }
         }
      }


      /**
       * Initializes the {@link CachingIndexReader#parents} <code>DocId</code>
       * array.
       *
       * @param reader the underlying index reader.
       * @throws IOException if an error occurs while reading from the index.
       */
      private void initializeParents(IndexReader reader) throws IOException
      {
         long time = System.currentTimeMillis();
         final Map docs = new HashMap();
         // read UUIDs
         collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector()
         {
            public void collect(Term term, TermDocs tDocs) throws IOException
            {
               String uuid = term.text();
               while (tDocs.next())
               {
                  int doc = tDocs.doc();
                  // skip shareable nodes
                  if (!shareableNodes.get(doc))
                  {
                     NodeInfo info = new NodeInfo(doc, uuid);
                     docs.put(new Integer(doc), info);
                  }
               }
            }
         });


         // read PARENTs
         collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector()
         {
            public void collect(Term term, TermDocs tDocs) throws IOException
            {
               String uuid = term.text();
               while (tDocs.next())
               {
                  Integer docId = new Integer(tDocs.doc());
                  NodeInfo info = (NodeInfo)docs.get(docId);
                  if (info == null)
                  {
                     // shareable node, see above
                  }
                  else
                  {
                     info.parent = uuid;
                     docs.remove(docId);
                     docs.put(info.uuid, info);
                  }
               }
            }
         });


         if (stopRequested)
         {
            return;
         }


         double foreignParents = 0;
         Iterator it = docs.values().iterator();
         while (it.hasNext())
         {
            NodeInfo info = (NodeInfo)it.next();
            NodeInfo parent = (NodeInfo)docs.get(info.parent);
            if (parent != null)
            {
               parents[info.docId] = DocId.create(parent.docId);
            }
            else if (info.parent != null)
            {
               foreignParents++;
               parents[info.docId] = DocId.create(info.parent);
            }
            else if (shareableNodes.get(info.docId))
            {
               Document doc = reader.document(info.docId, FieldSelectors.UUID_AND_PARENT);
               parents[info.docId] = DocId.create(doc.getValues(FieldNames.PARENT));
            }
            else
            {
               // no parent -> root node
               parents[info.docId] = DocId.NULL;
            }
         }
         if (log.isDebugEnabled())
         {
            NumberFormat nf = NumberFormat.getPercentInstance();
            nf.setMaximumFractionDigits(1);
            time = System.currentTimeMillis() - time;
            if (parents.length > 0)
            {
               foreignParents /= parents.length;
            }
            log.debug("initialized {} DocIds in {} ms, {} foreign parents", new Object[]{new Integer(parents.length),
               new Long(time), nf.format(foreignParents)});
         }
      }


      /**
       * Collects term docs for a given start term. All terms with the same
       * field as <code>start</code> are enumerated.
       *
       * @param reader the index reader.
       * @param start the term where to start the term enumeration.
       * @param collector collects the term docs for each term.
       * @throws IOException if an error occurs while reading from the index.
       */
      private void collectTermDocs(IndexReader reader, Term start, TermDocsCollector collector) throws IOException
      {
         TermDocs tDocs = reader.termDocs();
         try
         {
            TermEnum terms = reader.terms(start);
            try
            {
               int count = 0;
               do
               {
                  Term t = terms.term();
                  if (t != null && t.field() == start.field())
                  {
                     tDocs.seek(terms);
                     collector.collect(t, tDocs);
                  }
                  else
                  {
                     break;
                  }
                  // once in a while check if we should quit
                  if (++count % 10000 == 0)
                  {
                     if (stopRequested)
                     {
                        break;
                     }
                  }
               }
               while (terms.next());
            }
            finally
            {
               terms.close();
            }
         }
         finally
         {
            tDocs.close();
         }
      }
   }


   /**
    * Simple interface to collect a term and its term docs.
    */
   private interface TermDocsCollector
   {


      /**
       * Called for each term encountered.
       *
       * @param term the term.
       * @param tDocs the term docs of <code>term</code>.
       * @throws IOException if an error occurs while reading from the index.
       */
      void collect(Term term, TermDocs tDocs) throws IOException;
   }


   private final static class NodeInfo
   {


      final int docId;


      final String uuid;


      String parent;


      public NodeInfo(int docId, String uuid)
      {
         this.docId = docId;
         this.uuid = uuid;
      }
   }
}
Source Code of org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader$CacheInitializer

Related Classes of org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader$CacheInitializer