Package it.unibz.instasearch.indexing

Source Code of it.unibz.instasearch.indexing.StorageIndexer$NullIndexChangeListener

/*
* Copyright (c) 2009 Andrejs Jermakovics.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Andrejs Jermakovics - initial implementation
*/
package it.unibz.instasearch.indexing;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.eclipse.core.resources.IStorage;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IPath;

/**
*  Indexes documents of type IStorage
*/
public class StorageIndexer
{
  /** Maximum number of terms indexed per one document */
  private static final int MAX_TERMS_PER_DOC = 200000;
  /** */
  public static final String NO_VALUE = "<none>";
  /** */
  public static final int MIN_WORD_LENGTH = 1;
  protected static final FileAnalyzer fileAnalyzer = new FileAnalyzer(MIN_WORD_LENGTH);

  private IndexChangeListener changeListener = new NullIndexChangeListener();
 
  private static final Similarity similarity = new LengthNormSimilarity();
  private static final int MAX_RETRY_ATTEMPTS = 10;
  private Directory indexDir;

  /**
   * @throws IOException
   *
   */
  public StorageIndexer() throws IOException
  {
    checkLock();
  }

  private void checkLock() throws IOException
  {
    Directory indexDir = getIndexDir();
   
    if( IndexWriter.isLocked(indexDir) ) // should not be locked at startup, unlock
      IndexWriter.unlock(indexDir);
  }

  public Directory getIndexDir() throws IOException
  {
    if( indexDir == null ) indexDir = new RAMDirectory();

    return indexDir;
  }

  /**
   * @param create index
   * @return IndexWriter
   * @throws IOException
   */
  public IndexWriter createIndexWriter(boolean create) throws IOException
  {
    IndexWriter indexWriter = new IndexWriter(getIndexDir(), fileAnalyzer, create, MaxFieldLength.UNLIMITED);

    indexWriter.setMergeFactor(2); // use less resources (although slower)
    indexWriter.setSimilarity(similarity);
    indexWriter.setMaxFieldLength(MAX_TERMS_PER_DOC);

    return indexWriter;
  }

  /**
   * @return isIndexed
   * @throws IOException
   */
  public boolean isIndexed() throws IOException
  {
    return IndexReader.indexExists(getIndexDir());
  }

  /**
   * Check if the index can be read
   *
   * @return whether the index is readable
   */
  public boolean isReadable(){

    try {
      IndexReader reader = IndexReader.open(getIndexDir(), true);
      reader.close();

    } catch (IOException readingException) {
      return false;
    }

    return true;
  }

  /**
   * Delethe the whole index
   * @throws Exception
   */
  public void deleteIndex() throws Exception {

    RetryingRunnable runnable = new RetryingRunnable()
    { 
      public void run() throws Exception
      {
        IndexWriter w = createIndexWriter(true); // open for writing and close (make empty)
        w.deleteAll();
        w.commit();
        w.close(true);
       
        Directory dir = getIndexDir();
        for(String file: dir.listAll())
        {
          if( dir.fileExists(file) ) // still exits
          {
            dir.sync(file);
            dir.deleteFile(file);
          }
        }
        dir.close();
      }
     
      public boolean handleException(Throwable e)
      {
        return true;
      }
    };
   
    changeListener.onIndexReset(); // close searcher because index is deleted
   
    runRetryingRunnable(runnable); // delete index with retry
  }

  /**
   * @throws Exception
   */
  public void optimizeIndex() throws Exception {
    if( ! isIndexed() )
      return;

    IndexWriter w = createIndexWriter(false);
    w.optimize();
    w.close();

    changeListener.onIndexUpdate();
  }

  /**
   * @param changeListener the changeListener to set
   */
  public void setIndexChangeListener(IndexChangeListener changeListener) {
    this.changeListener = changeListener;
  }

  /**
   * @return the changeListener
   */
  protected IndexChangeListener getIndexChangeListener() {
    return changeListener;
  }

  /**
   *
   * @param indexWriter
   * @param storage
   * @param projectName
   * @param modificationStamp
   * @param jar path to jar file containing this file or null
   * @throws CoreException
   * @throws IOException
   */
  public void indexStorage(IndexWriter indexWriter, IStorage storage, String projectName,
      long modificationStamp, String jar) throws IOException
      {
    InputStream contents;
    try
    {
      contents = storage.getContents();
    }
    catch (Exception e)
    {
      throw new IOException(e);
    }
    BufferedReader isReader = new BufferedReader(new InputStreamReader(contents));
    IPath fullPath = storage.getFullPath();
    String ext = fullPath.getFileExtension();
    if( ext == null ) ext = NO_VALUE;

    Document doc = new Document();

    doc.add(createLuceneField(Field.CONTENTS,   isReader));
    doc.add(createLuceneField(Field.FILE,     fullPath.toString()));
    doc.add(createLuceneField(Field.PROJ,     projectName));
    doc.add(createLuceneField(Field.NAME,     fullPath.lastSegment()));
    doc.add(createLuceneField(Field.EXT,     ext.toLowerCase(Locale.ENGLISH)));
    doc.add(createLuceneField(Field.MODIFIED,   Long.toString(modificationStamp)));
    doc.add(createLuceneField(Field.JAR,     (jar==null)?NO_VALUE:jar));

    indexWriter.addDocument(doc);
  }

  private static void runRetryingRunnable(RetryingRunnable runnable) throws Exception
  {
    Throwable lastException = null;
   
    for(int i = 1; i <= MAX_RETRY_ATTEMPTS; i++)
    { 
      try
      {
        runnable.run();
        lastException = null;
        break;
      }
      catch(Throwable e) // exception during run occured
      {
        lastException = e;
       
        if( ! runnable.handleException(e) )
          break;
      }
     
      try {
        Thread.sleep( i*1000 ); // wait a bit longer each time for files to be freed
      } catch(Exception e) {
        break;
      }
    }
   
    if( lastException != null )
    {
      if( lastException instanceof Exception )
        throw (Exception)lastException;
      else
        throw new Exception(lastException);
    }
  }
 
  /**
   * Makes several attempts to index storage.
   * Occasionally the index files get locked (by other processes) and are temporarily not writable.
   *
   * @param indexWriter
   * @param storage
   * @param projectName
   * @param modificationStamp
   * @param jar
   * @throws IOException
   */
  protected void indexStorageWithRetry(final IndexWriter indexWriter, final IStorage storage,
      final String projectName, final long modificationStamp, final String jar) throws Exception
  {
    RetryingRunnable runnable = new RetryingRunnable()
    {
      public void run() throws Exception
      {
        indexStorage(indexWriter, storage, projectName, modificationStamp, jar);
      }
     
      public boolean handleException(Throwable e)
      {
        if( e instanceof OutOfMemoryError )
        {
          if( indexWriter.getMaxFieldLength() > IndexWriter.DEFAULT_MAX_FIELD_LENGTH )
            indexWriter.setMaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); // use less memory
          else
            return false;
        }
        else if( e instanceof IOException)
        {
          changeListener.onIndexReset(); // close searcher
        }
       
        return true; // keep retrying
      }
    };
   
    runRetryingRunnable(runnable);
  }
 
  public interface RetryingRunnable
  {
    public void run() throws Exception;
   
    /**
     * If exception occurs during run()
     *
     * @param e
     * @return true if should run again, false if stop and re-throw exception
     */
    public boolean handleException(Throwable e);
  }
 
  public void deleteStorage(IStorage storage) throws Exception
 
    IndexReader reader = IndexReader.open(getIndexDir(), false);

    String filePath = storage.getFullPath().toString();

    Term term = Field.FILE.createTerm(filePath);
    reader.deleteDocuments(term);

    reader.close();
  }


  private static org.apache.lucene.document.Field createLuceneField(Field fieldName, String value) {
    return new org.apache.lucene.document.Field(fieldName.toString(), value,
        Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED);
  }

  private static org.apache.lucene.document.Field createLuceneField(Field fieldName, Reader reader) {
    return new org.apache.lucene.document.Field(fieldName.toString(),
        reader, TermVector.YES);
  }

  /**
   * Extracts terms from text
   *
   * @param text
   * @return a map of terms to their offsets in text
   * @throws IOException
   */
  public static Map<String, List<Integer>> extractTextTerms(String text) throws IOException {
    Map<String, List<Integer>> terms = new HashMap<String, List<Integer>>();
    TokenStream tokenStream = fileAnalyzer.tokenStream(Field.CONTENTS.toString(), new StringReader(text));
   
    TermAttribute termAtt = (TermAttribute) tokenStream.addAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) tokenStream.addAttribute(OffsetAttribute.class);
   
    while(tokenStream.incrementToken())
    {
      String termText = termAtt.term().toLowerCase(Locale.ENGLISH);// t.termText().toLowerCase(Locale.ENGLISH);
      int offset = offsetAtt.startOffset();
     
      List<Integer> offsets = terms.get(termText);

      if( offsets == null ) {
        offsets = new LinkedList<Integer>();
        terms.put(termText, offsets);
      }

      offsets.add(offset);
    }
    tokenStream.close();

    return terms;
  }

  /**
   * Listener that gets called when index has changed
   */
  public interface IndexChangeListener
  {
    /** Index was updated with files or files were removed */
    public void onIndexUpdate();

    /** Index was reset - created or deleted */
    public void onIndexReset();
  }

  /** Empty implementation to void null checks (Null Object pattern) */
  private static class NullIndexChangeListener implements IndexChangeListener
  {
    public void onIndexUpdate() {}
    public void onIndexReset() {}
  }

}
TOP

Related Classes of it.unibz.instasearch.indexing.StorageIndexer$NullIndexChangeListener

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.