Package com.gentics.cr.lucene.indexer.index

Source Code of com.gentics.cr.lucene.indexer.index.LuceneAnalyzerFactory

package com.gentics.cr.lucene.indexer.index;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;

import com.gentics.cr.CRConfigFileLoader;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.lucene.LuceneVersion;
import com.gentics.cr.lucene.analysis.ReverseAnalyzer;
import com.gentics.cr.lucene.indexer.IndexerUtil;

/**
* TODO javadoc.
* Last changed: $Date: 2009-07-10 10:49:03 +0200 (Fr, 10 Jul 2009) $
* @version $Revision: 131 $
* @author $Author: supnig@constantinopel.at $
*
*/
public final class LuceneAnalyzerFactory {

  /**
   * Private constructor.
   */
  private LuceneAnalyzerFactory() {
  }

  /**
   * Log4j Logger for error and debug messages.
   */
  protected static final Logger LOGGER = Logger.getLogger(LuceneAnalyzerFactory.class);

  /**
   * Stop word config key.
   */
  private static final String STOP_WORD_FILE_KEY = "STOPWORDFILE";

  /**
   * Analyzer config key.
   */
  private static final String ANALYZER_CONFIG_KEY = "ANALYZERCONFIG";

  /**
   * Analyzer class key.
   */
  private static final String ANALYZER_CLASS_KEY = "ANALYZERCLASS";

  /**
   * Field name.
   */
  private static final String FIELD_NAME_KEY = "FIELDNAME";

  /**
   * Reveres attributes key.
   */
  private static final String REVERSE_ATTRIBUTES_KEY = "REVERSEATTRIBUTES";

  /**
   *   Reverse Attribute suffix.
   */
  public static final String REVERSE_ATTRIBUTE_SUFFIX = "_REVERSE";

  /**
   * This Map stores the same information as the PerFieldAnalyzerWrapper,
   * makes the used Analyzer class names (canonical names) per field accessible.
   * filled in the createAnalyzer method
   */
  private static Map<String, String> configuredAnalyzerMap = new HashMap<String, String>();

  /**
   * TODO javadoc.
   * @param config TODO javadoc
   * @return TODO javadoc
   */
  public static List<String> getReverseAttributes(final GenericConfiguration config) {
    GenericConfiguration analyzerConfig = loadAnalyzerConfig(config);
    if (analyzerConfig != null) {
      String reverseAttributeString = (String) analyzerConfig.get(REVERSE_ATTRIBUTES_KEY);
      return IndexerUtil.getListFromString(reverseAttributeString, ",");
    }
    return null;
  }

  /**
   * Creates an analyzer from the given config.
   * @param config TODO javadoc
   * @return TODO javadoc
   */
  public static Analyzer createAnalyzer(final GenericConfiguration config) {
    // Caching the analyzer instances is not possible as those do not implement Serializable
    // TODO: cache the config (imho caching should be implemented in the config itself)

    PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(createDefaultAnalyzer(config));
    configuredAnalyzerMap.clear();

    //Load analyzer config
    GenericConfiguration analyzerConfig = loadAnalyzerConfig(config);
    if (analyzerConfig != null) {
      ArrayList<String> addedReverseAttributes = new ArrayList<String>();
      List<String> reverseAttributes = getReverseAttributes(config);
      Map<String, GenericConfiguration> subconfigs = analyzerConfig.getSortedSubconfigs();
      if (subconfigs != null) {
        for (Map.Entry<String, GenericConfiguration> entry : subconfigs.entrySet()) {
          GenericConfiguration analyzerconfig = entry.getValue();
          String fieldname = analyzerconfig.getString(FIELD_NAME_KEY);
          String analyzerclass = analyzerconfig.getString(ANALYZER_CLASS_KEY);

          Analyzer analyzerInstance = createAnalyzer(analyzerclass, analyzerconfig);
          analyzerWrapper.addAnalyzer(fieldname, analyzerInstance);
          configuredAnalyzerMap.put(fieldname, analyzerInstance.getClass().getCanonicalName());

          //ADD REVERSE ANALYZERS
          if (reverseAttributes != null && reverseAttributes.contains(fieldname)) {
            addedReverseAttributes.add(fieldname);

            ReverseAnalyzer reverseAnalyzer = new ReverseAnalyzer(analyzerInstance);
            analyzerWrapper.addAnalyzer(fieldname + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer);

            configuredAnalyzerMap.put(fieldname + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer.getClass()
                .getCanonicalName());
          }
        }
      }
      //ADD ALL NON CONFIGURED REVERSE ANALYZERS
      if (reverseAttributes != null && reverseAttributes.size() > 0) {
        for (String att : reverseAttributes) {
          if (!addedReverseAttributes.contains(att)) {
            ReverseAnalyzer reverseAnalyzer = new ReverseAnalyzer(null);
            analyzerWrapper.addAnalyzer(att + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer);
            configuredAnalyzerMap.put(att + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer.getClass()
                .getCanonicalName());
          }
        }
      }
    }
    return analyzerWrapper;
  }

  /**
   * TODO javadoc.
   * @param config TODO javadoc
   * @return TODO javadoc
   */
  private static GenericConfiguration loadAnalyzerConfig(final GenericConfiguration config) {
    if (config.hasSubConfig(ANALYZER_CONFIG_KEY)) {
      return config.getSubConfig(ANALYZER_CONFIG_KEY);
    } else {
      GenericConfiguration analyzerConfig = null;
      String confpath = config.getString(ANALYZER_CONFIG_KEY);
      if (confpath != null) {
        analyzerConfig = new GenericConfiguration();
        try {
          CRConfigFileLoader.loadConfiguration(analyzerConfig, confpath, null);
        } catch (IOException e) {
          LOGGER.error("Could not load analyzer configuration from " + confpath, e);
        }
      }
      return analyzerConfig;
    }
  }

  /**
   * TODO javadoc.
   * @param analyzerclass TODO javadoc
   * @param config TODO javadoc
   * @return TODO javadoc
   */
  private static Analyzer createAnalyzer(final String analyzerclass, final GenericConfiguration config) {
    Analyzer a = null;
    try {
      //First try to create an Analyzer that takes a config object
      a = (Analyzer) Class.forName(analyzerclass).getConstructor(new Class[] { GenericConfiguration.class })
          .newInstance(config);
    } catch (Exception e1) {
      try {
        //IF FIRST FAILS TRY SIMPLE CONSTRUCTOR
        a = (Analyzer) Class.forName(analyzerclass).getConstructor().newInstance();
      } catch (Exception e2) {
        //IF SIMPLE FAILS, PROBABLY DID NOT FIND CONSTRUCTOR,
        //TRYING WITH VERSION ADDED
        try {
          a = (Analyzer) Class.forName(analyzerclass).getConstructor(new Class[] { Version.class })
              .newInstance(LuceneVersion.getVersion());
        } catch (Exception e3) {
          LOGGER.error("Could not instantiate Analyzer with class " + analyzerclass
              + ". Do you use some special" + " Analyzer? Or do you need to use a Wrapper?", e3);
        }
      }
    }
    return a;
  }

  /**
   * TODO javadoc.
   * @param config TODO javadoc
   * @return TODO javadoc
   */
  private static Analyzer createDefaultAnalyzer(final GenericConfiguration config) {
    //Update/add Documents
    Analyzer analyzer;
    File stopWordFile = IndexerUtil.getFileFromPath((String) config.get(STOP_WORD_FILE_KEY));
    if (stopWordFile != null) {
      //initialize Analyzer with stop words
      try {
        analyzer = new StandardAnalyzer(LuceneVersion.getVersion(), stopWordFile);
        return analyzer;
      } catch (IOException ex) {
        LOGGER.error("Could not open stop words file. " + "Will create standard " + "analyzer.", ex);
      }
    }

    analyzer = new StandardAnalyzer(LuceneVersion.getVersion(), CharArraySet.EMPTY_SET);
    return analyzer;
  }

  /**
   * Return a map of all used analyzers (per field).
   * This method calls createAnalyzer(config) so it is quite expensive.
   * The config parameter is needed for the call to createAnalyzer as this method
   * reads the analyzer configuration everytime!
   * Key: fieldname
   * Value: canonical class name
   * @param config needed for listing all analyzers.
   * @return Map of analyzers per field.
   */
  public static Map<String, String> getConfiguredAnalyzers(final GenericConfiguration config) {
    createAnalyzer(config);
    return configuredAnalyzerMap;
  }
}
TOP

Related Classes of com.gentics.cr.lucene.indexer.index.LuceneAnalyzerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.