Package org.sindice.siren.solr.schema

Source Code of org.sindice.siren.solr.schema.AnalyzerConfigReader

/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
*  https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*  http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.sindice.siren.solr.schema;

import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Map;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.FieldTypePluginLoader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.AbstractPluginLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
* Read a datatype's analyzer configuration.
* <p>
* Code taken from {@link FieldTypePluginLoader} and adapted for the SIREn's
* use case.
*/
public class AnalyzerConfigReader {

  public static final String LUCENE_MATCH_VERSION_PARAM = IndexSchema.LUCENE_MATCH_VERSION_PARAM;

  private static final
  Logger logger = LoggerFactory.getLogger(AnalyzerConfigReader.class);

  /**
   * Read an analyzer definition and instantiate an {@link Analyzer} object.
   *
   * <p> Code taken from {@link IndexSchema#readAnalyzer()}
   *
   * @param node An analyzer node from the config file
   * @return An analyzer
   * @throws XPathExpressionException If an XPath expression cannot be evaluated
   */
  protected static Analyzer readAnalyzer(final Node node,
                                         final SolrResourceLoader loader,
                                         final Version luceneMatchVersion)
  throws XPathExpressionException {
    if (node == null) return null;
    final NamedNodeMap attrs = node.getAttributes();

    final String analyzerName = DOMUtil.getAttr(attrs, "class");

    // check for all of these up front, so we can error if used in
    // conjunction with an explicit analyzer class.
    final XPath xpath = XPathFactory.newInstance().newXPath();
    final NodeList charFilterNodes = (NodeList) xpath.evaluate
      ("./charFilter",  node, XPathConstants.NODESET);
    final NodeList tokenizerNodes = (NodeList) xpath.evaluate
      ("./tokenizer", node, XPathConstants.NODESET);
    final NodeList tokenFilterNodes = (NodeList) xpath.evaluate
      ("./filter", node, XPathConstants.NODESET);

    if (analyzerName != null) {

      // explicitly check for child analysis factories instead of
      // just any child nodes, because the user might have their
      // own custom nodes (ie: <description> or something like that)
      if (0 != charFilterNodes.getLength() ||
          0 != tokenizerNodes.getLength() ||
          0 != tokenFilterNodes.getLength()) {
        throw new SolrException
        ( SolrException.ErrorCode.SERVER_ERROR,
          "Configuration Error: Analyzer class='" + analyzerName +
          "' can not be combined with nested analysis factories");
      }

      try {
        // No need to be core-aware as Analyzers are not in the core-aware list
        final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);

        try {
          // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
          final Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
          final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
          final Version matchVersion = (matchVersionStr == null) ?
            luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
          if (matchVersion == null) {
            throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
              "Configuration Error: Analyzer '" + clazz.getName() +
              "' needs a 'luceneMatchVersion' parameter");
          }
          return cnstr.newInstance(matchVersion);
        }
        catch (final NoSuchMethodException nsme) {
          // otherwise use default ctor
          return clazz.newInstance();
        }
      }
      catch (final Exception e) {
        logger.error("Cannot load analyzer: "+analyzerName, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                                "Cannot load analyzer: "+analyzerName, e);
      }
    }

    // Load the CharFilters
    // --------------------------------------------------------------------------------
    final ArrayList<CharFilterFactory> charFilters = new ArrayList<CharFilterFactory>();
    final AbstractPluginLoader<CharFilterFactory> charFilterLoader =
      new AbstractPluginLoader<CharFilterFactory>("[analyzerConfig] analyzer/charFilter",
                                                  CharFilterFactory.class, false, false )
    {
      @Override
      protected void init(final CharFilterFactory plugin, final Node node) throws Exception {
        if (plugin != null) {
          final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");

          final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
          plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
            plugin.getClass().getSimpleName(), luceneMatchVersion));

          plugin.init( params );
          charFilters.add( plugin );
        }
      }

      @Override
      protected CharFilterFactory register(final String name, final CharFilterFactory plugin) {
        return null; // used for map registration
      }
    };

    charFilterLoader.load(loader, charFilterNodes);

    // Load the Tokenizer
    // Although an analyzer only allows a single Tokenizer, we load a list to make sure
    // the configuration is ok
    // --------------------------------------------------------------------------------
    final ArrayList<TokenizerFactory> tokenizers = new ArrayList<TokenizerFactory>(1);
    final AbstractPluginLoader<TokenizerFactory> tokenizerLoader =
      new AbstractPluginLoader<TokenizerFactory>("[analyzerConfig] analyzer/tokenizer",
                                                 TokenizerFactory.class, false, false )
    {
      @Override
      protected void init(final TokenizerFactory plugin, final Node node)
      throws Exception {
        if (!tokenizers.isEmpty()) {
          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
              "Multiple tokenizers defined for: "+node);
        }
        final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");

        final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
        plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
          plugin.getClass().getSimpleName(), luceneMatchVersion));

        plugin.init(params);
        tokenizers.add(plugin);
      }

      @Override
      protected TokenizerFactory register(final String name, final TokenizerFactory plugin) {
        return null; // used for map registration
      }
    };

    tokenizerLoader.load(loader, tokenizerNodes);

    // Make sure something was loaded
    if (tokenizers.isEmpty()) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
        "analyzer without class or tokenizer & filter list");
    }


    // Load the Filters
    // --------------------------------------------------------------------------------
    final ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>();
    final AbstractPluginLoader<TokenFilterFactory> filterLoader =
      new AbstractPluginLoader<TokenFilterFactory>("[analyzerConfig] analyzer/filter",
      TokenFilterFactory.class, false, false)
    {
      @Override
      protected void init(final TokenFilterFactory plugin, final Node node) throws Exception {
        if (plugin != null) {
          final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(), "class");

          final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
          plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
            plugin.getClass().getSimpleName(), luceneMatchVersion));

          plugin.init(params);
          filters.add(plugin);
        }
      }

      @Override
      protected TokenFilterFactory register(final String name, final TokenFilterFactory plugin)
      throws Exception {
        return null; // used for map registration
      }
    };
    filterLoader.load(loader, tokenFilterNodes);

    return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
        tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
  }

  private static Version parseConfiguredVersion(final String configuredVersion, final String pluginClassName, final Version luceneMatchVersion) {
    final Version version = (configuredVersion != null) ?
      Config.parseLuceneVersionString(configuredVersion) : luceneMatchVersion;

    if (!version.onOrAfter(Version.LUCENE_40)) {
      logger.warn(pluginClassName + " is using deprecated " + version +
        " emulation. You should at some point declare and reindex to at least 4.0, because " +
        "3.x emulation is deprecated and will be removed in 5.0");
    }

    return version;
  }

}
TOP

Related Classes of org.sindice.siren.solr.schema.AnalyzerConfigReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.