Package com.gentics.cr.lucene.didyoumean

Source Code of com.gentics.cr.lucene.didyoumean.DidYouMeanProvider

package com.gentics.cr.lucene.didyoumean;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.CustomSpellChecker;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.store.Directory;

import com.gentics.cr.CRConfig;
import com.gentics.cr.CRConfigUtil;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.events.Event;
import com.gentics.cr.events.EventManager;
import com.gentics.cr.events.IEventReceiver;
import com.gentics.cr.lucene.events.IndexingFinishedEvent;
import com.gentics.cr.lucene.indexer.index.LuceneIndexLocation;
import com.gentics.cr.monitoring.MonitorFactory;
import com.gentics.cr.monitoring.UseCase;

/**
* This class can be used to build an autocomplete index over an existing lucene
* index.
*
* Last changed: $Date: 2010-04-01 15:20:21 +0200 (Do, 01 Apr 2010) $
*
* @version $Revision: 528 $
* @author $Author: supnig@constantinopel.at $
*
*/
public class DidYouMeanProvider implements IEventReceiver {

  protected static final Logger log = Logger.getLogger(DidYouMeanProvider.class);

  @Deprecated
  private Directory source = null;

  //@Deprecated
  //private Directory didyoumeanDirectory;

  private LuceneIndexLocation didyoumeanLocation;

  private static final String SOURCE_INDEX_KEY = "srcindexlocation";

  private static final String DIDYOUMEAN_INDEX_KEY = "didyoumeanlocation";

  private static final String DIDYOUMEAN_FIELD_KEY = "didyoumeanfields";

  private static final String DIDYOUMEAN_MIN_DISTANCESCORE = "didyoumeanmindistancescore";

  private static final String DIDYOUMEAN_MIN_DOCFREQ = "didyoumeanmindocfreq";

  /**
   * Configuration key to activate the didyoumean feature for terms that are
   * in the index but have a low result size.
   */
  private static final String DIDYOUMEAN_EXISTINGTERMS_KEY = "didyoumean_forexisitingterms";

  /**
   *
   */
  private static final String DIDYOUMEAN_USE_INDEX_EXTENSION = "didyoumeanUseIndexExtension";

  private String didyoumeanfield = "all";

  private CustomSpellChecker spellchecker = null;

  private boolean all = false;

  /**
   * Mark if we should provide the didyoumean feature for existing terms (with
   * low result count).
   */
  private boolean checkForExistingTerms = false;

  private Collection<String> dym_fields = null;

  private boolean dymreopenupdate = false;

  private static final String UPDATE_ON_REOPEN_KEY = "dymreopenupdate";

  /**
   * flag to indicate if the new DidyoumeanIndexExtension should be used <br>
   * new implementations must set the config key "useDidyomeanIndexExtension"
   * to true to use the extension.
   */
  @Deprecated
  private boolean useDidyomeanIndexExtension = false;

  private Float minDScore = null;
  private Integer minDFreq = null;

  public DidYouMeanProvider(CRConfig config) {

    useDidyomeanIndexExtension = config.getBoolean(DIDYOUMEAN_USE_INDEX_EXTENSION, useDidyomeanIndexExtension);

    if (!useDidyomeanIndexExtension) {
      GenericConfiguration srcConf = (GenericConfiguration) config.get(SOURCE_INDEX_KEY);
      source = LuceneIndexLocation.createDirectory(new CRConfigUtil(srcConf, "SOURCE_INDEX_KEY"));
    }

    GenericConfiguration autoConf = (GenericConfiguration) config.get(DIDYOUMEAN_INDEX_KEY);
    CRConfigUtil dymConfUtil = new CRConfigUtil(autoConf, config.getName() + "." + DIDYOUMEAN_INDEX_KEY);
    didyoumeanLocation = LuceneIndexLocation.getIndexLocation(dymConfUtil);
    if (!useDidyomeanIndexExtension) {
      didyoumeanLocation.registerDirectoriesSpecial();
    }

    checkForExistingTerms = config.getBoolean(DIDYOUMEAN_EXISTINGTERMS_KEY, checkForExistingTerms);

    minDScore = config.getFloat(DIDYOUMEAN_MIN_DISTANCESCORE, (float) 0.0);
    minDFreq = config.getInteger(DIDYOUMEAN_MIN_DOCFREQ, 0);

    didyoumeanfield = config.getString(DIDYOUMEAN_FIELD_KEY, didyoumeanfield);

    // FETCH DYM FIELDS
    if (this.didyoumeanfield.equalsIgnoreCase("ALL")) {
      all = true;
    } else if (this.didyoumeanfield.contains(",")) {
      String[] arr = this.didyoumeanfield.split(",");
      dym_fields = new ArrayList<String>(Arrays.asList(arr));
    } else {
      dym_fields = new ArrayList<String>(1);
      dym_fields.add(this.didyoumeanfield);
    }

    try {
      spellchecker = new CustomSpellChecker(didyoumeanLocation, minDScore, minDFreq);
    } catch (IOException e1) {
      log.error("Could not create didyoumean index.", e1);
      if (spellchecker != null) {
        spellchecker.close();
      }
    }

    if (!useDidyomeanIndexExtension) {
      String sDYMReopenUpdate = config.getString(UPDATE_ON_REOPEN_KEY);
      if (sDYMReopenUpdate != null) {
        dymreopenupdate = Boolean.parseBoolean(sDYMReopenUpdate);
      }

      try {
        reIndex();

      } catch (IOException e) {

      }

      EventManager.getInstance().register(this);
    }
  }

  // @Deprecated
  // public DidYouMeanProvider(CRConfig config)
  // {
  // this(config, false);
  // }

  public void processEvent(Event event) {
    if (IndexingFinishedEvent.INDEXING_FINISHED_EVENT_TYPE.equals(event.getType()) && !useDidyomeanIndexExtension) {
      try {
        reIndex();
      } catch (IOException e) {
        log.error("Could not reindex didyoumean index.", e);
      }
    }
  }

  public CustomSpellChecker getInitializedSpellchecker() {
    return this.spellchecker;
  }

  private long lastupdatestored = 0;

  private void checkForUpdate() {

    if (!useDidyomeanIndexExtension) {
      boolean reopened = false;
      try {
        if (source.fileExists("reopen")) {
          long lastmodified = source.fileModified("reopen");
          if (lastmodified != lastupdatestored) {
            reopened = true;
            lastupdatestored = lastmodified;
          }
        }
        if (reopened) {
          reIndex();
        }
      } catch (IOException e) {
        log.debug("Could not reIndex autocomplete index.", e);
      }
    }
  }

  /**
   *
   * @param termlist
   * @param count
   * @param reader
   * @return
   */
  public Map<String, String[]> getSuggestions(Set<Term> termlist, int count, IndexReader reader) {
    return getSuggestionsStringFromMap(getSuggestionTerms(termlist, count, reader));
  }

  /**
   *
   * @param termlist
   * @param count
   * @param reader
   * @return
   */
  public Map<Term, Term[]> getSuggestionTerms(Set<Term> termlist, int count, IndexReader reader) {

    if (dymreopenupdate) {
      checkForUpdate();
    }
    Map<Term, Term[]> result = new LinkedHashMap<Term, Term[]>();
    Set<Term> termset = new HashSet<Term>();

    if (this.spellchecker != null) {
      for (Term t : termlist) {
        // CHECK IF ALL FIELDS ENABLED FOR SUGGESTIONS OTHERWHISE ONLY
        // ADD TERM IF IT COMES FROM A DYM FIELD
        if (all || dym_fields.contains(t.field())) {
          termset.add(t);
        }
      }
      log.debug("Will use the following fields for dym: " + dym_fields.toString());
      for (Term term : termset) {
        try {
          if (checkForExistingTerms || !this.spellchecker.exist(term.text())) {
            String[] ts = this.spellchecker.suggestSimilar(term.text(), count, reader, term.field(), true);
            if (ts != null && ts.length > 0) {
              Term[] suggestedTerms = new Term[ts.length];
              for (int i = 0; i < ts.length; i++) {
                suggestedTerms[i] = term.createTerm(ts[i]);
              }
              result.put(term, suggestedTerms);
            }
          }
        } catch (IOException ex) {
          log.error("Could not suggest terms", ex);
        }
      }
    } else {
      log.error("Spellchecker has not properly been initialized.");
    }
    return result;
  }

  @Deprecated
  private synchronized void reIndex() throws IOException {
    UseCase ucReIndex = MonitorFactory.startUseCase("reIndex()");
    // build a dictionary (from the spell package)
    log.debug("Starting to reindex didyoumean index.");
    IndexReader sourceReader = IndexReader.open(source);
    Collection<String> fields = null;
    if (all) {
      fields = sourceReader.getFieldNames(IndexReader.FieldOption.ALL);
    } else {
      fields = dym_fields;
    }
    try {
      for (String fieldname : fields) {
        LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldname);
        spellchecker.indexDictionary(dict);
      }
    } finally {
      sourceReader.close();
    }
    log.debug("Finished reindexing didyoumean index.");
    ucReIndex.stop();
  }

  /**
   * Called as soon as the garbage collection detects there is no referencing object left.
   */
  public void finalize() {
    spellchecker.close();
    // stopping the didyoumeanLocation separately should not be needed. just in case.
    didyoumeanLocation.stop();
    EventManager.getInstance().unregister(this);
  }

  /**
   *
   * @param suggestions
   * @return
   */
  public Map<String, String[]> getSuggestionsStringFromMap(Map<Term, Term[]> suggestions) {
    Map<String, String[]> result = new LinkedHashMap<String, String[]>();
    for (Term key : suggestions.keySet()) {
      Term[] values = suggestions.get(key);
      ArrayList<String> valueStrings = new ArrayList<String>(values.length);
      for (Term value : values) {
        valueStrings.add(value.text());
      }
      result.put(key.text(), valueStrings.toArray(new String[valueStrings.size()]));
    }
    return result;
  }

}
TOP

Related Classes of com.gentics.cr.lucene.didyoumean.DidYouMeanProvider

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.