Package it.unibz.instasearch.indexing

Source Code of it.unibz.instasearch.indexing.WorkspaceIndexer

/*
* Copyright (c) 2009 Andrejs Jermakovics.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Andrejs Jermakovics - initial implementation
*/
package it.unibz.instasearch.indexing;

import it.unibz.instasearch.InstaSearchPlugin;
import it.unibz.instasearch.prefs.PreferenceConstants;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.eclipse.core.resources.IContainer;
import org.eclipse.core.resources.IFile;
import org.eclipse.core.resources.IFolder;
import org.eclipse.core.resources.IProject;
import org.eclipse.core.resources.IResource;
import org.eclipse.core.resources.IStorage;
import org.eclipse.core.resources.IWorkspaceRoot;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.content.IContentType;
import org.eclipse.core.runtime.content.IContentTypeManager;
import org.eclipse.core.runtime.jobs.ISchedulingRule;
import org.eclipse.jface.util.IPropertyChangeListener;
import org.eclipse.jface.util.PropertyChangeEvent;
import org.eclipse.ui.IEditorInput;
import org.eclipse.ui.ide.IDE;
import org.eclipse.ui.part.FileEditorInput;

/**
* WorkspaceIndexer
* Indexes the Eclipse workspace
*/
public class WorkspaceIndexer extends StorageIndexer implements ISchedulingRule, IPropertyChangeListener {
   
  private IContentType TEXT_CONTENT_TYPE;
 
  private static final ResourceCollector resourceCollector = new ResourceCollector();
 
  // Prefs
  private String fileExtensions[] = getIndexableFileExtensions();
  private List<Pattern> excludedDirRegExes = getExcludedDirsRegExes();
  private boolean indexEmptyExtension = InstaSearchPlugin.getBoolPref(PreferenceConstants.P_INDEX_EMPTY_EXTENSION);
 
  /**
   * @throws Exception
   */
  public WorkspaceIndexer() throws Exception 
  {
      super();
     
      if( Platform.getContentTypeManager() != null )
        TEXT_CONTENT_TYPE = Platform.getContentTypeManager().getContentType(IContentTypeManager.CT_TEXT);
  }
 
  @Override
  public Directory getIndexDir() throws IOException
  {
    return FSDirectory.open(getIndexDirLocation()); // FSDirectory.getDirectory(getIndexDirLocation(), false);
  }
 
  /**
   * @param monitor
   * @throws Exception
   */
  public void createIndex(IWorkspaceRoot root, IProgressMonitor monitor) throws Exception {
   
    getIndexChangeListener().onIndexReset();
   
    deleteIndex();
    Directory indexDirectory = FSDirectory.open(getIndexDirLocation());
   
    IndexWriter indexWriter = createIndexWriter(true);
   
    indexContainers(indexWriter, root, monitor);
   
    monitor.setTaskName("Optimizing Index");
    indexWriter.optimize();
   
    indexWriter.close();
    indexDirectory.close();
   
    getIndexChangeListener().onIndexUpdate();
   
    monitor.done();
  }
 
 
  /**
   * Index all containers in the workspace
   *
   * @param indexWriter
   * @param workspaceRoot
   * @param monitor
   * @throws Exception
   */
  protected void indexContainers(IndexWriter indexWriter, IWorkspaceRoot workspaceRoot, IProgressMonitor monitor) throws Exception {
   
    resourceCollector.clear();
    resourceCollector.setExcludedDirRegExes( excludedDirRegExes );
    workspaceRoot.accept(resourceCollector); // use visitor to collect containers
   
    monitor.beginTask("File Indexing", resourceCollector.getContainers().size());
   
    for(IContainer container: resourceCollector.getContainers())
    { 
      if( monitor.isCanceled() )
        break;
     
      monitor.setTaskName("Indexing: " + container.getProject().getName() + " - " + container.getName());
      indexContainer(indexWriter, container, monitor);
      monitor.worked(1);
    }
   
    monitor.done();
  }
 
  private File getIndexDirLocation()
  {
    File location = InstaSearchPlugin.getIndexDirLocation();
   
    if( ! location.exists() )
      location.mkdirs();
   
    return location;
  }
 
  @Override
  public boolean isIndexed() throws IOException
  {
    return IndexReader.indexExists(getIndexDir()) && super.isIndexed();
  }
 
  private static List<Pattern> getExcludedDirsRegExes()
  {
    String excludeDirList = InstaSearchPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.P_EXCLUDE_DIRS);
   
    if( excludeDirList == null || "".equals(excludeDirList) )
      return Collections.emptyList();
   
    List<Pattern> excludeDirSet = new ArrayList<Pattern>();
    String[] excludedDirArr = excludeDirList.split(File.pathSeparator);
   
    for (String wildCardPattern : excludedDirArr)
    {
      Pattern pattern = null;
      if( wildCardPattern.startsWith("/") ) wildCardPattern = wildCardPattern.substring(1);
     
      try {
        pattern = wildcardToRegex(wildCardPattern);
        excludeDirSet.add(pattern);
      } catch(Throwable t) {
        InstaSearchPlugin.debug(t);
      }
    }
   
    return excludeDirSet;
  }
 
  private static String[] getIndexableFileExtensions() {
   
    String extensionList = InstaSearchPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.P_INDEXABLE_EXTENSIONS);
    if( extensionList == null || "".equals(extensionList) || "*".equals(extensionList) ) return null;
   
    extensionList = extensionList.replace(" ", "");
   
    String[] extensions = extensionList.split("[,|;:]");
    if( extensions.length == 0 ) return null;
   
    for(int i = 0; i<extensions.length; i++) {
      String ext = extensions[i].toLowerCase(Locale.ENGLISH).trim();
      if( ext.startsWith("*") ) ext = ext.substring(1);
      if( ext.startsWith(".") ) ext = ext.substring(1);
      extensions[i] = ext;
    }
   
    Arrays.sort(extensions);
   
    return extensions;
  }
 
  /**
   * @param file
   * @return isIndexable
   * @throws CoreException
   */
  public boolean isIndexable(IFile file) throws CoreException {
   
    String ext = file.getFileExtension();
   
    return isIndexableExtension(ext) || isTextFile(file);
  }

  /**
   * @param ext
   * @return isIndexableExtension
   */
  public boolean isIndexableExtension(String ext) {
   
    if( fileExtensions == null || fileExtensions.length == 0 )
      return true; // all files indexable
   
    if( ext == null || "".equals(ext) )
      return indexEmptyExtension;
   
    if( Arrays.binarySearch(fileExtensions, ext.toLowerCase(Locale.ENGLISH)) >= 0 )
      return true;
   
    return false;
  }
 
  private void indexFile(IndexWriter indexWriter, IFile file) throws Exception {
   
    if( ! file.isAccessible() )
      return;
   
    if( file.isDerived(IResource.CHECK_ANCESTORS) )
      return;
   
    if( ! file.isSynchronized(IResource.DEPTH_ZERO) )
      return;
   
    if( file.getRawLocation() == null ) // unknown location
      return;
   
    File f = file.getRawLocation().toFile();
    if( f == null || ! f.canRead() )
      return;
   
    indexStorageWithRetry(indexWriter, file, file.getProject().getName(), file.getLocalTimeStamp(), null);
  }
 
  /**
   * @param file
   * @throws Exception
   */
  public void updateFile(IFile file) throws Exception {
   
    if( !isIndexed() )
      return;
   
    deleteStorage(file);
   
    if( !isIndexable(file) )
      return;
   
    if( file.isAccessible() && !file.isDerived(IResource.CHECK_ANCESTORS) )
    {
      IndexWriter w = createIndexWriter(false);
      indexFile(w, file);
      w.close();
    }

  }
 
  /**
   * Deletes and re-indexes files in a folder
   *
   * @param folder
   * @param monitor
   * @throws Exception
   */
  public void updateFolder(IFolder folder, IProgressMonitor monitor) throws Exception {
   
    if( !isIndexed() )
      return;
   
    IndexReader reader = IndexReader.open(getIndexDir(), false);
    deleteFolder(reader, folder);
    reader.close();
   
    if( !folder.isAccessible() ) 
      return;
   
    resourceCollector.clear();
    resourceCollector.setExcludedDirRegExes(excludedDirRegExes);
    folder.accept(resourceCollector); // get also subfolders
   
    IndexWriter w = createIndexWriter(false);
   
    for(IContainer container: resourceCollector.getContainers())
    {
      if( isExcluded(container) ) continue;
      indexContainer(w, container, monitor);
    }
   
    w.close();
  }

  /**
   * @param container
   * @return
   */
  private boolean isExcluded(IContainer container) {
   
    if( container == null || excludedDirRegExes == null || excludedDirRegExes.isEmpty() )
      return false;
   
    return ResourceCollector.isResourceExcluded(container, excludedDirRegExes)
        || isExcluded(container.getParent());
  }

  /**
   * @param project
   * @param monitor
   * @throws Exception
   */
  public void updateProject(IProject project, IProgressMonitor monitor) throws Exception {
   
    if( !isIndexed() )
      return;
   
    deleteProject(project);
   
    if( project.exists() && project.isAccessible() && project.isOpen() ) {
      IndexWriter w = createIndexWriter(false);
     
      resourceCollector.clear();
      resourceCollector.setExcludedDirRegExes(excludedDirRegExes);
      project.accept(resourceCollector);
     
      for(IContainer container: resourceCollector.getContainers())
      {
        indexContainer(w, container, monitor);
      }
     
      w.close();
    }
   
  }
 
  /**
   * @param w
   * @param container
   * @param monitor
   * @throws Exception
   */
  private void indexContainer(IndexWriter w, IContainer container, IProgressMonitor monitor) throws Exception
  {
    if( !container.isAccessible() || container.isDerived(IResource.CHECK_ANCESTORS) )
      return;
   
    IResource[] members = container.members(false);
   
    for(IResource member: members) {
     
      if( monitor.isCanceled() )
        return;
     
      if( member.getType() != IResource.FILE || !member.isAccessible() || member.isDerived() )
        continue;
     
      IFile file = (IFile) member;
     
      if( isIndexable(file) )
        indexFile(w, file);
    }
  }
 
  /**
   * @param file
   * @return
   * @throws CoreException
   */
  private boolean isTextFile(IFile file) throws CoreException {
   
    if( !file.isSynchronized(IResource.DEPTH_ZERO) )
      return false;
   
    IContentType contentType = IDE.getContentType(file);
    if( contentType == null ) contentType = IDE.guessContentType(file);
    if( contentType == null ) return false;
   
    if( TEXT_CONTENT_TYPE != null && contentType.isKindOf(TEXT_CONTENT_TYPE) )
      return true;
   
    return false;
  }

  /**
   * @throws IOException
   *
   */
  private void deleteFolder(IndexReader reader, IContainer container) throws Exception {
   
    IndexSearcher searcher = new IndexSearcher(reader);
    String path = container.getFullPath().addTrailingSeparator().toString();
    TopDocs topDocs = searcher.search(new PrefixQuery(Field.FILE.createTerm(path)), reader.numDocs());
   
    for(ScoreDoc doc: topDocs.scoreDocs)
    {
      int docNum = doc.doc;
      reader.deleteDocument(docNum);
    }
   
    searcher.close();
  }
 
  /**
   * @param project
   * @return deletedCount
   * @throws Exception
   */
  public int deleteProject(IProject project) throws Exception {
    IndexReader reader = IndexReader.open(getIndexDir(), false);
    String filePath = project.getFullPath().toString();
   
    Term term = Field.PROJ.createTerm(filePath);
    int deletedCount = reader.deleteDocuments(term);
   
    reader.close();
   
    return deletedCount;
  }
 
  public boolean isConflicting(ISchedulingRule rule)
  {
    return rule == this// prevent concurrent writing of the index from jobs
  }

  public boolean contains(ISchedulingRule rule)
  {
    return rule == this;
  }

  /**
   * @param doc
   * @return IEditorInput
   * @throws Exception
   * @throws IOException
   */
  public IEditorInput getEditorInput(SearchResultDoc doc) throws Exception {
   
    return new FileEditorInput(doc.getFile());
  }

  /**
   * @param doc
   * @return IStorage
   * @throws Exception
   * @throws IOException
   */
  public IStorage getStorage(SearchResultDoc doc) throws Exception {
    return doc.getFile();
  }
 
  public void propertyChange(PropertyChangeEvent event) {
    String prop = event.getProperty();
   
    if( PreferenceConstants.P_INDEXABLE_EXTENSIONS.equals(prop) )
      fileExtensions = getIndexableFileExtensions();
    else if( PreferenceConstants.P_EXCLUDE_DIRS.equals(prop) )
      excludedDirRegExes = getExcludedDirsRegExes()
    else if( PreferenceConstants.P_INDEX_EMPTY_EXTENSION.equals(prop) )
      indexEmptyExtension = InstaSearchPlugin.getBoolPref(PreferenceConstants.P_INDEX_EMPTY_EXTENSION);
   
  }
 
  /**
   * Convert path matching wildcard pattern to regular expression.
   *
   *
   * @param pathWildcardPattern
   * @return regex pattern
   */
  public static Pattern wildcardToRegex(String pathWildcardPattern)
  {
    String regex = pathWildcardPattern;
   
    regex = regex.replaceAll("\\*\\*", "<double-star>"); // escape initially
    regex = regex.replaceAll("\\.", "\\."); // escape .
    regex = regex.replaceAll("\\*", "[^/]*");
    regex = regex.replaceAll("\\?", ".");
    regex = regex.replaceAll("<double-star>", ".*");
   
    Pattern pattern = Pattern.compile(regex);
   
    return pattern;
  }
}
TOP

Related Classes of it.unibz.instasearch.indexing.WorkspaceIndexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.