Source Code of com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker

package com.gentics.cr.lucene.indexer.index;


import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Vector;


import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.store.Directory;


import com.gentics.api.lib.resolving.Resolvable;
import com.gentics.cr.lucene.indexaccessor.IndexAccessor;
import com.gentics.cr.monitoring.MonitorFactory;
import com.gentics.cr.monitoring.UseCase;
import com.gentics.cr.util.indexing.IndexUpdateChecker;


/**
 * Lucene Implementation of IndexUpdateChecker.
 * Walks an Index and compares Identifyer/Timestamp pairs to the Objects in the Index
 * 
 * Last changed: $Date: 2009-09-02 17:57:48 +0200 (Mi, 02 Sep 2009) $
 * @version $Revision: 180 $
 * @author $Author: supnig@constantinopel.at $
 */
public class LuceneIndexUpdateChecker extends IndexUpdateChecker {


  LuceneIndexLocation indexLocation;
  IndexAccessor indexAccessor;
  LinkedHashMap<String, Integer> docs;
  Iterator<String> docIT;
  Vector<String> checkedDocuments;
  private static final Logger log = Logger.getLogger(LuceneIndexUpdateChecker.class);


  /**
   * Initializes the Lucene Implementation of {@link IndexUpdateChecker}.
   * @param indexLocation
   * @param termKey - Key under wich the termValue is stored in the Index e.g. CRID
   * @param termValue - Value wich to use for iteration e.g. CRID_1
   * @param idAttribute - ID-Attribute key that will be used for Identifyer
   * comparison. This has to represent the field where the identifyer in the
   * method {@link com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker#checkUpToDate(String, Object, String, Resolvable)} 
   * is present.
   * @throws IOException 
   */
  public LuceneIndexUpdateChecker(final LuceneIndexLocation indexLocation, final String termKey, final String termValue,
    final String idAttribute) {
    this.indexLocation = indexLocation;
    indexAccessor = indexLocation.getAccessor();
    IndexReader reader = null;
    try {
      reader = indexAccessor.getReader(true);


      TermDocs termDocs = reader.termDocs(new Term(termKey, termValue));
      log.debug("Fetching sorted documents from index...");
      docs = fetchSortedDocs(termDocs, reader, idAttribute);
      log.debug("Fetched sorted docs from index");
      docIT = docs.keySet().iterator();


      checkedDocuments = new Vector<String>(100);


      //TODO CONTINUE HERE PREPARE TO USE ITERATOR IN CHECK METHOD
    } catch (Throwable e) {
      log.error("Error while retrieving termdocs. Next step: close down connection in finally block", e);
    } finally {
      if (indexAccessor != null && reader != null) {
        log.debug("Closing down indexreader with write permissions (LuceneIndexUpdateChecker instantiation failed)");
        indexAccessor.release(reader, true);
      }
    }
  }


  @Override
  protected final boolean checkUpToDate(final String identifyer, final Object timestamp, final String timestampattribute,
      final Resolvable object) {
    String timestampString;
    if (timestamp == null) {
      return false;
    } else {
      timestampString = timestamp.toString();
    }
    if ("".equals(timestampString)) {
      return false;
    }


    boolean readerWithWritePermissions = false;
    if (docs.containsKey(identifyer)) {


      Integer documentId = docs.get(identifyer);
      IndexReader reader = null;
      try {
        reader = indexAccessor.getReader(readerWithWritePermissions);
        Document document = reader.document(documentId);
        checkedDocuments.add(identifyer);
        Object documentUpdateTimestamp = null;
        try {
          documentUpdateTimestamp = document.get(timestampattribute);
        } catch (NumberFormatException e) {
          log.debug("Got an error getting the document for " + identifyer + " from index", e);
        }
        indexAccessor.release(reader, readerWithWritePermissions);
        //Use strings to compare the attributes
        if (documentUpdateTimestamp != null && !(documentUpdateTimestamp instanceof String)) {
          documentUpdateTimestamp = documentUpdateTimestamp.toString();
        }
        if (documentUpdateTimestamp == null || !documentUpdateTimestamp.equals(timestampString)) {
          log.debug(identifyer + ": object is not up to date.");
          return false;
        }
        log.debug(identifyer + ": object is up to date.");
        return true;
      } catch (IOException e) {
        //TODO specify witch index is not readable
        String directories = "";
        Directory[] dirs = indexLocation.getDirectories();
        for (Directory dir : dirs) {
          directories += dir.toString() + '\n';
        }
        log.error("Cannot open index for reading. (Directory: " + directories + ")", e);
        return true;
      } finally {
        if (indexAccessor != null) {
          indexAccessor.release(reader, readerWithWritePermissions);
          log.debug("Released reader with write permission: " + readerWithWritePermissions + " at thread: "
              + Thread.currentThread().getName() + " - threadid: " + Thread.currentThread().getId());
        }
      }
    } else {
      //object is not yet in the index => it is not up to date
      return false;
    }
  }


  @Override
  public void deleteStaleObjects() {
    log.debug(checkedDocuments.size() + " objects checked, " + docs.size() + " objects already in the index.");
    IndexReader writeReader = null;
    boolean readerNeedsWrite = true;
    UseCase deleteStale = MonitorFactory.startUseCase("LuceneIndexUpdateChecker.deleteStaleObjects(" + indexLocation.getName() + ")");
    try {
      boolean objectsDeleted = false;
      for (String contentId : docs.keySet()) {
        if (!checkedDocuments.contains(contentId)) {
          log.debug("Object " + contentId + " wasn't checked in the last run. So i will delete it.");
          if (writeReader == null) {
            writeReader = indexAccessor.getReader(readerNeedsWrite);
          }
          writeReader.deleteDocument(docs.get(contentId));
          objectsDeleted = true;
        }
      }
      if (objectsDeleted) {
        indexLocation.createReopenFile();
      }
    } catch (IOException e) {
      log.error("Cannot delete objects from index.", e);
    } finally {
      //always release writeReader it blocks other threads if you don't 
      if (writeReader != null) {
        indexAccessor.release(writeReader, readerNeedsWrite);
      }
      log.debug("Finished cleaning stale documents");
      deleteStale.stop();
    }
    checkedDocuments.clear();
  }


  private LinkedHashMap<String, Integer> fetchSortedDocs(TermDocs termDocs, IndexReader reader, String idAttribute) throws IOException {
    LinkedHashMap<String, Integer> tmp = new LinkedHashMap<String, Integer>();


    while (termDocs.next()) {
      Document doc = reader.document(termDocs.doc());
      String docID = doc.get(idAttribute);
      tmp.put(docID, termDocs.doc());
    }


    LinkedHashMap<String, Integer> ret = new LinkedHashMap<String, Integer>(tmp.size());
    Vector<String> v = new Vector<String>(tmp.keySet());
    Collections.sort(v);
    for (String id : v) {
      ret.put(id, tmp.get(id));
    }
    return ret;
  }


}
Source Code of com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker

Related Classes of com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker