Source Code of it.unibz.instasearch.indexing.StorageIndexer$NullIndexChangeListener

/*
 * Copyright (c) 2009 Andrejs Jermakovics.
 * 
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     Andrejs Jermakovics - initial implementation
 */
package it.unibz.instasearch.indexing;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;


import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.eclipse.core.resources.IStorage;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IPath;


/**
 *  Indexes documents of type IStorage 
 */
public class StorageIndexer 
{
  /** Maximum number of terms indexed per one document */
  private static final int MAX_TERMS_PER_DOC = 200000;
  /** */
  public static final String NO_VALUE = "<none>";
  /** */
  public static final int MIN_WORD_LENGTH = 1;
  protected static final FileAnalyzer fileAnalyzer = new FileAnalyzer(MIN_WORD_LENGTH);


  private IndexChangeListener changeListener = new NullIndexChangeListener();
  
  private static final Similarity similarity = new LengthNormSimilarity();
  private static final int MAX_RETRY_ATTEMPTS = 10;
  private Directory indexDir;


  /**
   * @throws IOException 
   * 
   */
  public StorageIndexer() throws IOException 
  {
    checkLock();
  }


  private void checkLock() throws IOException 
  {
    Directory indexDir = getIndexDir();
    
    if( IndexWriter.isLocked(indexDir) ) // should not be locked at startup, unlock
      IndexWriter.unlock(indexDir);
  }


  public Directory getIndexDir() throws IOException 
  {
    if( indexDir == null ) indexDir = new RAMDirectory();


    return indexDir;
  }


  /**
   * @param create index
   * @return IndexWriter
   * @throws IOException 
   */
  public IndexWriter createIndexWriter(boolean create) throws IOException 
  {
    IndexWriter indexWriter = new IndexWriter(getIndexDir(), fileAnalyzer, create, MaxFieldLength.UNLIMITED);


    indexWriter.setMergeFactor(2); // use less resources (although slower)
    indexWriter.setSimilarity(similarity);
    indexWriter.setMaxFieldLength(MAX_TERMS_PER_DOC);


    return indexWriter;
  }


  /**
   * @return isIndexed
   * @throws IOException 
   */
  public boolean isIndexed() throws IOException 
  {
    return IndexReader.indexExists(getIndexDir());
  }


  /**
   * Check if the index can be read
   * 
   * @return whether the index is readable
   */
  public boolean isReadable(){


    try {
      IndexReader reader = IndexReader.open(getIndexDir(), true);
      reader.close();


    } catch (IOException readingException) {
      return false;
    }


    return true;
  }


  /**
   * Delethe the whole index
   * @throws Exception
   */
  public void deleteIndex() throws Exception {


    RetryingRunnable runnable = new RetryingRunnable() 
    {  
      public void run() throws Exception 
      {
        IndexWriter w = createIndexWriter(true); // open for writing and close (make empty)
        w.deleteAll();
        w.commit();
        w.close(true);
        
        Directory dir = getIndexDir();
        for(String file: dir.listAll())
        {
          if( dir.fileExists(file) ) // still exits
          {
            dir.sync(file);
            dir.deleteFile(file);
          }
        }
        dir.close();
      }
      
      public boolean handleException(Throwable e) 
      {
        return true;
      }
    };
    
    changeListener.onIndexReset(); // close searcher because index is deleted
    
    runRetryingRunnable(runnable); // delete index with retry
  }


  /**
   * @throws Exception
   */
  public void optimizeIndex() throws Exception {
    if( ! isIndexed() )
      return;


    IndexWriter w = createIndexWriter(false);
    w.optimize();
    w.close();


    changeListener.onIndexUpdate();
  }


  /**
   * @param changeListener the changeListener to set
   */
  public void setIndexChangeListener(IndexChangeListener changeListener) {
    this.changeListener = changeListener;
  }


  /**
   * @return the changeListener
   */
  protected IndexChangeListener getIndexChangeListener() {
    return changeListener;
  }


  /**
   * 
   * @param indexWriter
   * @param storage
   * @param projectName
   * @param modificationStamp 
   * @param jar path to jar file containing this file or null
   * @throws CoreException
   * @throws IOException
   */
  public void indexStorage(IndexWriter indexWriter, IStorage storage, String projectName,
      long modificationStamp, String jar) throws IOException 
      {
    InputStream contents;
    try
    {
      contents = storage.getContents();
    }
    catch (Exception e)
    {
      throw new IOException(e);
    }
    BufferedReader isReader = new BufferedReader(new InputStreamReader(contents));
    IPath fullPath = storage.getFullPath();
    String ext = fullPath.getFileExtension();
    if( ext == null ) ext = NO_VALUE;


    Document doc = new Document();


    doc.add(createLuceneField(Field.CONTENTS,   isReader));
    doc.add(createLuceneField(Field.FILE,     fullPath.toString()));
    doc.add(createLuceneField(Field.PROJ,     projectName));
    doc.add(createLuceneField(Field.NAME,     fullPath.lastSegment()));
    doc.add(createLuceneField(Field.EXT,     ext.toLowerCase(Locale.ENGLISH)));
    doc.add(createLuceneField(Field.MODIFIED,   Long.toString(modificationStamp)));
    doc.add(createLuceneField(Field.JAR,     (jar==null)?NO_VALUE:jar));


    indexWriter.addDocument(doc);
  }


  private static void runRetryingRunnable(RetryingRunnable runnable) throws Exception
  {
    Throwable lastException = null;
    
    for(int i = 1; i <= MAX_RETRY_ATTEMPTS; i++)
    {  
      try
      {
        runnable.run();
        lastException = null;
        break;
      } 
      catch(Throwable e) // exception during run occured
      {
        lastException = e;
        
        if( ! runnable.handleException(e) )
          break;
      }
      
      try {
        Thread.sleep( i*1000 ); // wait a bit longer each time for files to be freed
      } catch(Exception e) { 
        break;
      }
    }
    
    if( lastException != null )
    {
      if( lastException instanceof Exception )
        throw (Exception)lastException;
      else
        throw new Exception(lastException);
    }
  }
  
  /**
   * Makes several attempts to index storage.
   * Occasionally the index files get locked (by other processes) and are temporarily not writable.
   * 
   * @param indexWriter
   * @param storage 
   * @param projectName 
   * @param modificationStamp 
   * @param jar 
   * @throws IOException 
   */
  protected void indexStorageWithRetry(final IndexWriter indexWriter, final IStorage storage,
      final String projectName, final long modificationStamp, final String jar) throws Exception
  {
    RetryingRunnable runnable = new RetryingRunnable()
    {
      public void run() throws Exception 
      {
        indexStorage(indexWriter, storage, projectName, modificationStamp, jar);
      }
      
      public boolean handleException(Throwable e) 
      {
        if( e instanceof OutOfMemoryError )
        {
          if( indexWriter.getMaxFieldLength() > IndexWriter.DEFAULT_MAX_FIELD_LENGTH )
            indexWriter.setMaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); // use less memory
          else 
            return false;
        }
        else if( e instanceof IOException)
        {
          changeListener.onIndexReset(); // close searcher
        }
        
        return true; // keep retrying
      }
    };
    
    runRetryingRunnable(runnable);
  }
  
  public interface RetryingRunnable
  {
    public void run() throws Exception;
    
    /**
     * If exception occurs during run()
     * 
     * @param e
     * @return true if should run again, false if stop and re-throw exception
     */
    public boolean handleException(Throwable e);
  }
  
  public void deleteStorage(IStorage storage) throws Exception 
  {  
    IndexReader reader = IndexReader.open(getIndexDir(), false);


    String filePath = storage.getFullPath().toString();


    Term term = Field.FILE.createTerm(filePath);
    reader.deleteDocuments(term);


    reader.close();
  }




  private static org.apache.lucene.document.Field createLuceneField(Field fieldName, String value) {
    return new org.apache.lucene.document.Field(fieldName.toString(), value, 
        Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED);
  }


  private static org.apache.lucene.document.Field createLuceneField(Field fieldName, Reader reader) {
    return new org.apache.lucene.document.Field(fieldName.toString(), 
        reader, TermVector.YES);
  }


  /**
   * Extracts terms from text
   * 
   * @param text
   * @return a map of terms to their offsets in text
   * @throws IOException
   */
  public static Map<String, List<Integer>> extractTextTerms(String text) throws IOException {
    Map<String, List<Integer>> terms = new HashMap<String, List<Integer>>();
    TokenStream tokenStream = fileAnalyzer.tokenStream(Field.CONTENTS.toString(), new StringReader(text));
    
    TermAttribute termAtt = (TermAttribute) tokenStream.addAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) tokenStream.addAttribute(OffsetAttribute.class);
    
    while(tokenStream.incrementToken())
    {
      String termText = termAtt.term().toLowerCase(Locale.ENGLISH);// t.termText().toLowerCase(Locale.ENGLISH);
      int offset = offsetAtt.startOffset();
      
      List<Integer> offsets = terms.get(termText);


      if( offsets == null ) {
        offsets = new LinkedList<Integer>();
        terms.put(termText, offsets);
      }


      offsets.add(offset);
    }
    tokenStream.close();


    return terms;
  }


  /**
   * Listener that gets called when index has changed
   */
  public interface IndexChangeListener
  {
    /** Index was updated with files or files were removed */
    public void onIndexUpdate();


    /** Index was reset - created or deleted */
    public void onIndexReset();
  }


  /** Empty implementation to void null checks (Null Object pattern) */
  private static class NullIndexChangeListener implements IndexChangeListener 
  {
    public void onIndexUpdate() {}
    public void onIndexReset() {}
  }


}
Source Code of it.unibz.instasearch.indexing.StorageIndexer$NullIndexChangeListener

Related Classes of it.unibz.instasearch.indexing.StorageIndexer$NullIndexChangeListener