Package it.unibz.instasearch.ui

Source Code of it.unibz.instasearch.ui.ResultContentProvider$MatchFindCallback

/*
* Copyright (c) 2009 Andrejs Jermakovics.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Andrejs Jermakovics - initial implementation
*/
package it.unibz.instasearch.ui;

import it.unibz.instasearch.InstaSearch;
import it.unibz.instasearch.InstaSearchPlugin;
import it.unibz.instasearch.indexing.Field;
import it.unibz.instasearch.indexing.SearchQuery;
import it.unibz.instasearch.indexing.SearchResult;
import it.unibz.instasearch.indexing.SearchResultDoc;
import it.unibz.instasearch.indexing.Searcher;
import it.unibz.instasearch.indexing.StorageIndexer;
import it.unibz.instasearch.indexing.WorkspaceIndexer;
import it.unibz.instasearch.prefs.PreferenceConstants;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.eclipse.core.resources.IFile;
import org.eclipse.core.resources.IStorage;
import org.eclipse.jface.viewers.ITreeContentProvider;
import org.eclipse.jface.viewers.Viewer;
import org.eclipse.search.ui.text.Match;
import org.eclipse.ui.IEditorInput;

class ResultContentProvider implements ITreeContentProvider {
 
  private static final String[] NO_INDEX_MESSAGE = new String[] {"Index is not built"};
  private static final String[] NO_FILE_MESSAGE = new String[] {"File missing"};
  private static final String NO_RESULTS_MESSAGE = "0 matches";
 
  public static final double MAX_LINE_SIMILARITY = 0.9; // if above that, only one of the similar lines is kept
  public static final int MAX_LINES_TO_PROCESS = 5000; // max nr of lines to read from files (prevent slowdown on huge files)
 
  private WorkspaceIndexer indexer;
  private Searcher searcher;
 
  /**
   * Map of search terms to their boost score
   */
  private SearchQuery currentSearchQuery;
  private Map<String, Float> searchTerms;
 
  private Object[] cachedResults;
  private SearchResultDoc cachedResultDoc;
  private Object[] cachedChildren;
  private int resultCount;
 
 
  public ResultContentProvider() {
    InstaSearch instaSearch = InstaSearchPlugin.getInstaSearch();
   
    this.indexer = instaSearch.getIndexer();
    this.searcher = instaSearch.getSearcher();
  }
 
  public void inputChanged(Viewer v, Object oldInput, Object newSearch) {
    if( newSearch==null || !(newSearch instanceof SearchQuery) ) {
      currentSearchQuery = null; // clear cache
      cachedResults = null;
   
  }
 
  public void dispose() { 
  }
 
  public Object[] getElements(Object searchQueryObj)
  { 
    if( searchQueryObj==null || !(searchQueryObj instanceof SearchQuery) )
      return Collections.EMPTY_LIST.toArray(); // not searching
   
    SearchQuery searchQuery = (SearchQuery) searchQueryObj;
    String searchString = searchQuery.getSearchString();
   
    if( searchString==null || searchString.toString().length() < Searcher.MIN_QUERY_LENGTH )
      return Collections.EMPTY_LIST.toArray();
   
    try
    {
      if( ! indexer.isIndexed() )
        return NO_INDEX_MESSAGE;
    } catch(Exception e)
    {
      InstaSearchPlugin.log(e);
      return new Exception[]{e};
    }
   
    if( searchQuery.equals(currentSearchQuery) && cachedResults != null ) // same query
      return cachedResults;
   
    SearchResult result = null;
    cachedResults = null;
    resultCount = 0;
    Object[] resultArray = null;
   
    if(searchString != null)
    {
      try {
        result = searcher.search(searchQuery); // do the search
        currentSearchQuery = searchQuery;
       
        if( result == null ) {
          if( !searchQuery.isFuzzy() ) {
            SearchQuery newQuery = new SearchQuery(searchQuery);
            newQuery.setExact(false);
            newQuery.setFuzzy(true);
            resultArray = new Object[]{NO_RESULTS_MESSAGE, newQuery}; // add fuzzy query
          } else {
            resultArray = new Object[]{NO_RESULTS_MESSAGE};
          }
         
          cachedResults = resultArray;
         
          return resultArray;
        }
       
        searchTerms = result.getSearchTerms();
       
      } catch (Exception e) {
        InstaSearchPlugin.log(e);
        return new Exception[]{e};
      }
    }
   
    List<SearchResultDoc> resultDocs = result.getResultDocs();
    this.resultCount = resultDocs.size();
   
    boolean addMoreResults = false, addFindSimilar = false;
   
    if( searchQuery.isLimited() && result.isFull() ) { // if only showing limited number of matches
      addMoreResults = true;
    }
    else
    {
      if( searchQuery.isExact() && !searchQuery.isFuzzy() ) // if query is exact, can try search for individual tokens
        addFindSimilar = true;
    }
   
    if( addMoreResults || addFindSimilar )
      resultArray = new Object[resultCount + 1]// +1 because we append "More..." element (a SearchQuery object)
    else
      resultArray = new Object[resultCount];
   
    resultDocs.toArray(resultArray);
   
    if( addMoreResults ) { // if more results, create More result entry (return all elements)
      SearchQuery moreResultsQuery = new SearchQuery(currentSearchQuery);
      moreResultsQuery.setMaxResults(SearchQuery.UNLIMITED_RESULTS); // all results
      resultArray[resultCount] = moreResultsQuery;
    }
    else if( addFindSimilar )
    {
      SearchQuery findSimilarQuery = new SearchQuery(currentSearchQuery);
      findSimilarQuery.setExact(false);
      resultArray[resultCount] = findSimilarQuery;
    }
   
    cachedResults = resultArray;
   
    return resultArray;
  }
 
  /**
   *  Count of search result docs from last search
   * @return result count
   * 
   */
  public int getResultCount()
  {
    return resultCount;
  }
 
  /**
   * Returns last search result elements.
   * Includes search result docs and additional action entries (eg More results)
   *
   * @return  array of last search result elements
   */
  public Object[] getElements() {
    return cachedResults;
  }
 
  public Object[] getChildren(Object parent)
  {
    if( parent instanceof SearchResultDoc ) {
     
      SearchResultDoc doc = (SearchResultDoc) parent;
     
      if( cachedResultDoc != null && doc.equals(cachedResultDoc) )
        return cachedChildren; // cache results
     
      List<MatchLine> matches = null;
      Object[] children = null;
     
      try {
        matches = getMatchLines(doc, true, null);
        if( matches != null )
          children = matches.toArray();
      } catch (Exception e) {
        InstaSearchPlugin.log(e);
      }
     
      if( matches == null )
        children = NO_FILE_MESSAGE;
     
      cachedResultDoc = doc;
      cachedChildren = children;
     
      return children;
    }
    else if( parent instanceof Exception ) {
      Exception e = (Exception) parent;
      return e.getStackTrace();
    }
   
    return Collections.EMPTY_LIST.toArray();
  }
 
  public interface MatchFindCallback
  {
    void matchFound(MatchLine line);
    boolean isCanceled();
  }
 
  /**
   * Returns matched lines
   * @param doc
   * @param limit
   * @return
   * @throws Exception
   */
  List<MatchLine> getMatchLines(SearchResultDoc doc, boolean limit, MatchFindCallback callback) throws Exception {
   
    if( searchTerms == null || currentSearchQuery == null )
      return Collections.emptyList();
   
    int maxMatches = InstaSearchPlugin.getIntPref(PreferenceConstants.P_SHOWN_LINES_COUNT);
    List<MatchLine> matchedLines = new ArrayList<MatchLine>();
    int matchCount = doc.getMatchCount();
    String searchString = currentSearchQuery.getSearchString().toLowerCase(Locale.ENGLISH);
   
    IStorage f = getStorage(doc);
    if( f == null ) {
      // index might be outdated (disabled updating)
      //TODO: remove file from index (update index)
      return null;
    }
   
    InputStream fileInputStream = null;
   
    if( f instanceof IFile ) {
      IFile file = (IFile) f;
      if( !file.exists() )
        return null;
      fileInputStream = file.getContents(true);
    } else {
      fileInputStream = f.getContents();
    }
   
    LineNumberReader lineReader = new LineNumberReader(new InputStreamReader(fileInputStream)); // is a buffered reader
   
    String line;
   
    // Read through file one line at a time
    while ( (line = lineReader.readLine()) != null ) {
     
      if( callback != null && callback.isCanceled() ) break;
      //if( currentSearchQuery.isCanceled() ) break;
     
      if( "".equals(line) ) continue;
     
      Map<String, List<Integer>> lineTerms = StorageIndexer.extractTextTerms(line);
      if( lineTerms.isEmpty() ) continue;
     
      HashSet<String> matchedTerms = new HashSet<String>(searchTerms.keySet()); // search terms that appear on this line
      matchedTerms.retainAll(lineTerms.keySet());
     
      if( matchedTerms.isEmpty() && matchCount != 0 && limit ) // if have matches in general, but not on this line, then skip
        continue;
     
      float[] lineTermScoreVector = doc.getTermScoreVector(lineTerms.keySet());
      float[] matchedTermScoreVector = doc.getTermScoreVector(matchedTerms);
     
      MatchLine matchLine = new MatchLine(doc, line, lineReader.getLineNumber(), matchedTerms, lineTermScoreVector, matchedTermScoreVector);
      matchedLines.add(matchLine);
     
      addMatches(matchLine, lineTerms, matchedTerms, searchString);
      if( callback != null )
        callback.matchFound(matchLine);
     
      if( lineReader.getLineNumber() > MAX_LINES_TO_PROCESS )
        break;
     
      //TODO: break if all current matches have high score (eg >0.9)
    }
   
    lineReader.close();
   
    if(limit && matchedLines.size() > maxMatches) { 
      matchedLines = getTopMatchLines(maxMatches, matchedLines); // return TOP N lines
      return matchedLines;
     
    } else
      return matchedLines;
  }

  private List<MatchLine> getTopMatchLines(int maxMatchLines, List<MatchLine> matchedLines) {
    Collections.sort(matchedLines); // sort by match count, score, line
    removeSimilarLines(matchedLines, maxMatchLines);
    matchedLines = matchedLines.subList(0, maxMatchLines); // top N results
   
    Collections.sort(matchedLines, new Comparator<MatchLine>() { // sort by line number for display
      public int compare(MatchLine l1, MatchLine l2) {
        return l1.getLineNumber() - l2.getLineNumber();
      }
    });
   
    return matchedLines;
  }

  /**
   * Find matches on the line
   *
   * @param matchLine
   * @param terms
   * @param matchedTerms
   * @param searchString
   * @return
   */
  private float addMatches(MatchLine matchLine, Map<String, List<Integer>> terms,
      Set<String> matchedTerms, String searchString) {
   
    String lcaseLine = matchLine.getLine().toLowerCase(Locale.ENGLISH);
   
    if( !matchedTerms.contains(searchString) && !currentSearchQuery.isFuzzy() ) { // check for exact match on the line
     
      int pos = lcaseLine.indexOf(searchString);
     
      while( pos != -1 ) {
        Match m = new Match(searchString, pos, searchString.length());
        matchLine.add(m, true);
        pos = lcaseLine.indexOf(searchString, pos + searchString.length() - 1);
      }
    }
   
    float matchedTermBoost = 0;
   
    for(String term: matchedTerms) {             
      List<Integer> offsets = terms.get(term);
     
      for(int offset: offsets) {
        int pos = lcaseLine.indexOf(term, offset);
        if( pos == -1 ) continue;
        Match m = new Match(term, pos, term.length());
        matchLine.add(m);
      }
     
      float boost = searchTerms.get(term);
     
      matchedTermBoost += boost;
    }
   
    matchLine.setMatchedTermBoost(matchedTermBoost);
   
    return matchedTermBoost;
  }
 
  /**
   * Removes similar lines from line matches.
   * Even if they are high scored, we don not want to see the same lines again
   * Line similarity is based on Cosine between their corresponding term vectors
   *
   * @param matchedLines
   * @param maxMatches
   */
  private void removeSimilarLines(List<MatchLine> matchedLines, int maxMatches)
  {
    MatchLine curMatchLine = null;
   
    int lineNr = 0;
    for (Iterator<MatchLine> iterator = matchedLines.iterator();
          iterator.hasNext() && matchedLines.size()>maxMatches; )
    {
      MatchLine matchLine = iterator.next();
     
      if( curMatchLine == null ) {
        curMatchLine = matchLine;
        lineNr++;
        continue;
      }
     
      double similarity = getLineSimilarity(curMatchLine, matchLine);
     
      if( similarity > MAX_LINE_SIMILARITY )
        iterator.remove(); // since lines are sorted by score, lowest score line will be removed
      else {
        curMatchLine = matchLine;
        lineNr++; 
      }
     
      if( lineNr == maxMatches )
        break;
    }
  }

  /**
   * Calculates similarity based on the Cosine angle between score vectors of each line.
   *
   * @param lineMatches1
   * @param lineMatches2
   * @return
   */
  private double getLineSimilarity(MatchLine lineMatches1, MatchLine lineMatches2)
  {
    float[] vect1 = lineMatches1.getScoreVector();
    float[] vect2 = lineMatches2.getScoreVector();
   
    double dotProduct = 0.0;
    double magnitude1 = 0.0;
    double magnitude2 = 0.0;
    for (int i = 0; i < vect1.length ; i++) {
        double val1 = vect1[i];
        double val2 = vect2[i];
        magnitude1 += val1 * val1;
        magnitude2 += val2 * val2;
        dotProduct += val1 * val2;
    }
    magnitude1 = Math.sqrt(magnitude1);
    magnitude2 = Math.sqrt(magnitude2);
    return (magnitude1 == 0 || magnitude2 == 0)
        ? 0
        : dotProduct / (magnitude1 * magnitude2);
   
  }

  public Object getParent(Object element) {
    return null;
  }
 
  public boolean hasChildren(Object element) {
    return ( element instanceof SearchResultDoc ) || ( element instanceof SearchQuery );
  }
 
  public Collection<String> getSearchTerms() {
    return searchTerms.keySet();
  }

  public IEditorInput getEditorInput(SearchResultDoc doc) throws Exception {
    return indexer.getEditorInput(doc);
  }
 
  public IStorage getStorage(SearchResultDoc doc) throws Exception {
    return indexer.getStorage(doc);
  }
 
  public List<String> getProposals(String prefix, Field field) throws IOException
  {
    List<String> ucaseProposals = searcher.getProposals(prefix.toUpperCase(), field);
   
    if( prefix.toUpperCase().equals(prefix.toLowerCase(Locale.ENGLISH)))
      return ucaseProposals;
   
    List<String> lcaseProposals = searcher.getProposals(prefix.toLowerCase(Locale.ENGLISH), field);
   
    ucaseProposals.addAll(lcaseProposals);
    Collections.sort(ucaseProposals, String.CASE_INSENSITIVE_ORDER);
   
    return ucaseProposals;
  }
 
  /**
   * A class representing a line in a document and containing some keyword matches
   */
  class MatchLine implements Comparable<MatchLine> {
   
    private String lineText;
    private List<Match> matches = new LinkedList<Match>();
    private SearchResultDoc doc;
    private int lineNumber;
 
    private double termScore;
    private double matchedTermScore;
    private float[] scoreVector;
    private float matchedTermBoost;
    private int exactMatches;
    private int matchedTermCount;
   
    private MatchLine(SearchResultDoc doc, String lineText, int lineNumber, Set<String> matchedTerms, float[] termScoreVector, float[] matchedTermScoreVector) throws IOException {
      this.doc = doc;
      this.lineText = lineText;
      this.lineNumber = lineNumber;
      this.scoreVector = termScoreVector;
     
      termScore = getMagnitude(termScoreVector);
      matchedTermScore = getMagnitude(matchedTermScoreVector);
      matchedTermCount = matchedTerms.size();
    }

    public void setMatchedTermBoost(float matchedTermBoost)
    {
      this.matchedTermBoost = matchedTermBoost;
    }

    public float getMatchedTermBoost()
    {
      return matchedTermBoost;
    }
   
    public void add(Match m) {
      matches.add(m);
    }
   
    public void add(Match m, boolean isExactMatch) {
      matches.add(m);
      if( isExactMatch )
        this.exactMatches++;
    }
   
    public List<Match> getMatches() {
      return matches;
    }

    public String getLine() {
      return lineText;
    }

    public int getLineNumber() {
      return lineNumber;
    }
       
    public double getTermScore()
    {
      return termScore;
    }
   
    public float[] getScoreVector()
    {
      return scoreVector;
    }
   
    public double getMatchedTermScore()
    {
      return matchedTermScore;
    }
   
    public int compareTo(MatchLine lineMatches) { // to sort by match count and then by line number
      int diff = lineMatches.exactMatches - exactMatches;
      if( diff == 0 )
        diff = lineMatches.matchedTermCount - matchedTermCount;
      if( diff == 0 )
        Double.compare(lineMatches.matchedTermBoost, matchedTermBoost);
      if(diff == 0)
        diff = Double.compare(lineMatches.termScore, termScore);
      if(diff == 0)
        return getLineNumber() - lineMatches.lineNumber; // smaller to bigger
      return diff;
    }
   
    public SearchResultDoc getResultDoc()
    {
      return doc;
    }
 
    /**
     * Vector magnitude
     *
     * @param vect
     * @return
     */
    private double getMagnitude(float[] vect)
    {
      double magnitude = 0;
     
      for(float value: vect)
        magnitude+=value*value;
     
      magnitude = Math.sqrt(magnitude);
     
      return magnitude;
    }

    @Override
    public String toString()
    {
      return "Line " + lineNumber + ": (" + matchedTermCount + ")" + lineText;
    }
  }
}
TOP

Related Classes of it.unibz.instasearch.ui.ResultContentProvider$MatchFindCallback

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.