Package com.senseidb.plugin.analyzer

Source Code of com.senseidb.plugin.analyzer.LucenePerFieldAnalyzerFactory

/**
* This software is licensed to you under the Apache License, Version 2.0 (the
* "Apache License").
*
* LinkedIn's contributions are made under the Apache License. If you contribute
* to the Software, the contributions will be deemed to have been made under the
* Apache License, unless you expressly indicate otherwise. Please do not make any
* contributions that would be inconsistent with the Apache License.
*
* You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, this software
* distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
* License for the specific language governing permissions and limitations for the
* software governed under the Apache License.
*
* © 2012 LinkedIn Corp. All Rights Reserved.
*/
package com.senseidb.plugin.analyzer;

import java.util.HashMap;
import java.util.Map;

import org.apache.commons.collections.MapUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;

import com.senseidb.plugin.SenseiPluginFactory;
import com.senseidb.plugin.SenseiPluginRegistry;

/**
* A {@code SenseiPluginFactory} that instantiates a {@link PerFieldAnalyzerWrapper}. It reads
* the field names as a comma-separated list from the <code>fields</code> property. To specify
* the analyzer for each field, you must do it the same way you normally would, but appending
* <code>.field.<i>fieldname</i></code> to the usual prefix. You can set the default analyzer
* with the <code>default</code> property. If this property is missing, the default analyzer
* is set to {@link StandardAnalyzer} with version {@code LUCENE_35}. Example configuration:
*
* <pre>
* sensei.index.analyzer.class=com.senseidb.plugin.analyzer.LucenePerFieldAnalyzerFactory
* sensei.index.analyzer.default.class=com.senseidb.plugin.analyzer.LuceneKeywordAnalyzerFactory
* sensei.index.analyzer.fields=content_tokenized,content_keyword
*
* sensei.index.analyzer.fields.content_tokenized.class=com.senseidb.plugin.analyzer.LucenePatternAnalyzerFactory
* sensei.index.analyzer.fields.content_tokenized.pattern=[ -_./:]
*
* sensei.index.analyzer.fields.content_keyword.class=com.senseidb.plugin.analyzer.LuceneKeywordAnalyzerFactory
* </pre>
*
* @author jgrande
*
*/
public class LucenePerFieldAnalyzerFactory implements SenseiPluginFactory<Analyzer> {

    @Override
    public Analyzer getBean(Map<String, String> initProperties, String fullPrefix, SenseiPluginRegistry pluginRegistry) {
        Analyzer defaultAnalyzer = pluginRegistry.getBeanByFullPrefix(fullPrefix + ".default", Analyzer.class);
        if (defaultAnalyzer == null) {
            defaultAnalyzer = new StandardAnalyzer(Version.LUCENE_35);
        }

        String[] fields = MapUtils.getString(initProperties, "fields", "").split("\\s*,\\s*");
        Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
        for (String field: fields) {
            String analyzerPrefix = fullPrefix + ".fields." + field;
            Analyzer analyzer = pluginRegistry.getBeanByFullPrefix(analyzerPrefix, Analyzer.class);
            analyzers.put(field, analyzer);
        }

        return new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzers);
    }

}
TOP

Related Classes of com.senseidb.plugin.analyzer.LucenePerFieldAnalyzerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.