Package com.gentics.cr.lucene.search.query

Source Code of com.gentics.cr.lucene.search.query.SynonymQueryParser

package com.gentics.cr.lucene.search.query;

import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;

import com.gentics.cr.CRConfigUtil;
import com.gentics.cr.CRRequest;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.lucene.indexaccessor.IndexAccessor;
import com.gentics.cr.lucene.indexer.index.LuceneIndexLocation;


/**
* The SynonymQueryParser change the users Query,
* if there are any synonyms for his searchTerm.
*
* @author Patrick Höfer <p.hoefer@gentics.com>
*/
public class SynonymQueryParser extends CRQueryParser {

  /**
   * maximum count of fetched Synonyms.
   */
  private static final int MAX_SYNONYMS = 20;

  /**
   *
   * GenericConfiguration object of the factory.
   */
  private GenericConfiguration config;

  /**
   * attributes which are searched in query.
   */
  private String[] searchedAttributes;
 
  /**
   * static log4j {@link Logger} to log errors and debug.
   */ 
  private static Logger log = Logger.getLogger(SynonymQueryParser.class);
 
  /**
   * sub Query Parser, which is used as "super QueryParser".
   */
  private QueryParser childQueryParser;

  /**
   * Constructor.
   *
   * @param pconfig Generic Configuration object
   * @param version Version
   * @param searchedAttributes All searched Attributes
   * @param analyzer Analyzer
   * @param crRequest CRRequest
   */
  public SynonymQueryParser(final GenericConfiguration pconfig, Version version,
      final String[] searchedAttributes, Analyzer analyzer, CRRequest crRequest) {
    super(version, searchedAttributes, analyzer, crRequest);
    this.config = pconfig;
    this.searchedAttributes = searchedAttributes;
    //get SubqueryParser if available
    this.childQueryParser = CRQueryParserFactory.getConfiguredParser(searchedAttributes, analyzer, crRequest, new CRConfigUtil(pconfig, "Subconfig"));
  }

  /**
   * parse the query for lucene.
   *
   * @param query as {@link String}
   * @return parsed lucene query
   * @throws ParseException when the query cannot be successfully parsed
   */
  public final Query parse(final String query) throws ParseException {
    String crQuery = query;

    crQuery = replaceBooleanMnoGoSearchQuery(crQuery);
    if (getAttributesToSearchIn().size() > getOne()) {
      crQuery = addMultipleSearchedAttributes(crQuery);
    }
    crQuery = addWildcardsForWordmatchParameter(crQuery);
    crQuery = replaceSpecialCharactersFromQuery(crQuery);

    Query resultQuery = childQueryParser.parse(crQuery);
   
    try {
      resultQuery = childQueryParser.parse(includeSynonyms(crQuery));
    } catch (IOException e) {
      log.debug("Error while adding synonyms to query.", e);
    }
   
    return resultQuery;
   
  }
 
  /**
   * look for synonyms in specified Synonymlocation.
   * add the synonyms to search query
   *
   * @param query the search query, before the synonyms are added
   * @return searchQuery as String, with added synonyms
   * @throws IOException when theres a problem with accessing the Index
   */
  public final String includeSynonyms(String query) throws IOException {
   
    GenericConfiguration autoConf = (GenericConfiguration) config.get("synonymlocation");
    LuceneIndexLocation synonymLocation = LuceneIndexLocation
        .getIndexLocation(new CRConfigUtil(autoConf, "synonymlocation"));
   
   
    IndexAccessor ia = synonymLocation.getAccessor();
    Searcher synonymSearcher = ia.getPrioritizedSearcher();
    IndexReader synonymReader = ia.getReader(false);
   
   
    try {
      HashSet<String> searchedTerms = new HashSet<String>();
     
      //get all searched Terms out of query
      for (int i = 0; i < searchedAttributes.length; i++) {
        String subquery = query;
        while (subquery.indexOf(searchedAttributes[i] + ":") > 0) {
          subquery = subquery.substring(subquery.indexOf(searchedAttributes[i] + ":") + searchedAttributes[i].length() + 1);
          int substringUntil = -1;
          int pos1 = subquery.indexOf(")");
          int pos2 = subquery.indexOf(" ");
          if (pos1 != -1) {
            substringUntil = pos1;
          }
          if (pos2 != -1) {
            substringUntil = pos2;
          }
          if (pos1 != -1 && pos2 != -1) {
            if (pos1 <= pos2) {
              substringUntil = pos1;
            }
            else {
              substringUntil = pos2;
            }
          }
          if (substringUntil == -1) {
            substringUntil = subquery.length();
          }
          String addtoSet = subquery.substring(0, substringUntil).replaceAll("\\*", "").replaceAll("\\(", "").replaceAll("\\)", "");
          searchedTerms.add(addtoSet);
          subquery = subquery.substring(substringUntil);
        }
       
      }
     
      //create the query-String for synonym-Index with all searchedTerms
      Iterator<String> it = searchedTerms.iterator();
      String queryString = "";
      while (it.hasNext()) {
        queryString = queryString + "Deskriptor:" + it.next() + " ";
      }
      Query querySynonym;
      try {
        querySynonym = super.parse(queryString);
      } catch (ParseException e) {
        e.printStackTrace();
        log.debug("Error while parsing query for accessing the synonym Index.", e);
        return query;
      }
     
      //get all Synonyms from SynonymIndex and add them to searchQuery
      log.debug("Synonym Query String: " + querySynonym.toString());
      TopDocs docs = synonymSearcher.search(querySynonym, MAX_SYNONYMS);
      log.debug("total found synonyms: " + docs.totalHits)
      for (ScoreDoc doc : docs.scoreDocs) {
        Document d = synonymReader.document(doc.doc);
        for (int i = 0; i < searchedAttributes.length; i++) {
          query = query + " OR " + searchedAttributes[i] + ":" + d.get("Synonym");
        }
      }
    } finally {
      ia.release(synonymSearcher);
      ia.release(synonymReader, false);
    }

    return query;
  }

}
TOP

Related Classes of com.gentics.cr.lucene.search.query.SynonymQueryParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.