Source Code of org.fao.geonet.kernel.search.SearchManager

//===  Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================


package org.fao.geonet.kernel.search;
import com.google.common.collect.Lists;
import com.vividsolutions.jts.geom.Envelope;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.index.SpatialIndex;
import jeeves.server.context.ServiceContext;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Util;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.domain.Pair;
import org.fao.geonet.exceptions.JeevesException;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.GeonetworkDataDirectory;
import org.fao.geonet.kernel.SchemaManager;
import org.fao.geonet.kernel.search.LuceneConfig.FacetConfig;
import org.fao.geonet.kernel.search.LuceneConfig.LuceneConfigNumericField;
import org.fao.geonet.kernel.search.function.DocumentBoosting;
import org.fao.geonet.kernel.search.index.GeonetworkMultiReader;
import org.fao.geonet.kernel.search.index.IndexInformation;
import org.fao.geonet.kernel.search.index.LuceneIndexLanguageTracker;
import org.fao.geonet.kernel.search.spatial.ContainsFilter;
import org.fao.geonet.kernel.search.spatial.CrossesFilter;
import org.fao.geonet.kernel.search.spatial.EqualsFilter;
import org.fao.geonet.kernel.search.spatial.IntersectionFilter;
import org.fao.geonet.kernel.search.spatial.IsFullyOutsideOfFilter;
import org.fao.geonet.kernel.search.spatial.OgcGenericFilters;
import org.fao.geonet.kernel.search.spatial.OrSpatialFilter;
import org.fao.geonet.kernel.search.spatial.OverlapsFilter;
import org.fao.geonet.kernel.search.spatial.SpatialFilter;
import org.fao.geonet.kernel.search.spatial.SpatialIndexWriter;
import org.fao.geonet.kernel.search.spatial.TouchesFilter;
import org.fao.geonet.kernel.search.spatial.WithinFilter;
import org.fao.geonet.kernel.setting.SettingInfo;
import org.fao.geonet.utils.IO;
import org.fao.geonet.utils.Log;
import org.fao.geonet.utils.Xml;
import org.geotools.data.DataStore;
import org.geotools.data.DefaultTransaction;
import org.geotools.data.FeatureSource;
import org.geotools.data.Transaction;
import org.geotools.gml3.GMLConfiguration;
import org.geotools.xml.Configuration;
import org.geotools.xml.Parser;
import org.jdom.Content;
import org.jdom.Element;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.filter.capability.FilterCapabilities;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;


import java.io.File;


import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.Vector;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;


/**
 * Indexes metadata using Lucene.
 */
public class SearchManager {
    private static final String INDEXING_ERROR_MSG = "_indexingErrorMsg";
  private static final String INDEXING_ERROR_FIELD = "_indexingError";
  public static final int LUCENE = 1;
  public static final int Z3950 = 2;
  public static final int UNUSED = 3;


  private static final String SEARCH_STYLESHEETS_DIR_PATH = "xml/search";
    private static final String STOPWORDS_DIR_PATH = "resources/stopwords";


  private static final Configuration FILTER_1_0_0 = new org.geotools.filter.v1_0.OGCConfiguration();
    private static final Configuration FILTER_1_1_0 = new org.geotools.filter.v1_1.OGCConfiguration();
    private static final Configuration FILTER_2_0_0 = new org.geotools.filter.v2_0.FESConfiguration();
    public static final String FACET_FIELD_SUFFIX = "_facet";


    private File _stylesheetsDir;
    private static File _stopwordsDir;
  Map<String, FacetConfig> _summaryConfigValues = null;


    /**
     * Used when adding documents to the Lucene index.
     */
  private static PerFieldAnalyzerWrapper _analyzer;
  /**
     * Used when searching to analyze query terms.
     */
  private static PerFieldAnalyzerWrapper _searchAnalyzer;


    /**
     * Maps languages to Analyzer that contains GeoNetworkAnalyzers initialized with stopwords for this language.
     */
    private static Map<String, Analyzer> analyzerMap = new HashMap<String, Analyzer>();
    private static Map<String, Analyzer> searchAnalyzerMap = new HashMap<String, Analyzer>();
    
  private static DocumentBoosting _documentBoostClass;
  private String _luceneTermsToExclude;
  private boolean _logSpatialObject;
  private static PerFieldAnalyzerWrapper _defaultAnalyzer;
  private String _htmlCacheDir;
    private Spatial _spatial;


  private boolean _logAsynch;
  private LuceneOptimizerManager _luceneOptimizerManager;


    @Autowired
    private LuceneIndexLanguageTracker _tracker;
    @Autowired
    private ApplicationContext _applicationContext;
    @Autowired
    private SettingInfo _settingInfo;
    @Autowired
    private SchemaManager _schemaManager;
    @Autowired
    private GeonetworkDataDirectory _geonetworkDataDirectory;
    @Autowired
    private LuceneConfig _luceneConfig;
    @Qualifier("timerThreadPool")
    @Autowired
    private ScheduledThreadPoolExecutor timer;


    public SettingInfo getSettingInfo() {
        return _settingInfo;
    }




    /**
     * Creates GeoNetworkAnalyzer, using Admin-defined stopwords if there are any.
     *
     * @param stopwords the set of words considered "stop" words.  These are the words in the language that are not considered important
     *                  for indexing.  For example in english some stop words are 'the', 'it', 'and', etc...
     * @param ignoreChars characters that should be ignored.  For example ' or -.
     * @return
     */
    private static Analyzer createGeoNetworkAnalyzer(final Set<String> stopwords, final char[] ignoreChars) {
        if(Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
            Log.debug(Geonet.SEARCH_ENGINE, "Creating GeoNetworkAnalyzer");
        }
        return new GeoNetworkAnalyzer(stopwords, ignoreChars);
    }


    /**
     * Creates a default- (hardcoded) configured PerFieldAnalyzerWrapper.
     *
     * @param stopwords
     * @return
     */
  private static PerFieldAnalyzerWrapper createHardCodedPerFieldAnalyzerWrapper(Set<String> stopwords, char[] ignoreChars) {
        PerFieldAnalyzerWrapper pfaw;
        Analyzer geoNetworkAnalyzer = SearchManager.createGeoNetworkAnalyzer(stopwords, ignoreChars);
    Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
    analyzers.put(LuceneIndexField.UUID, new GeoNetworkAnalyzer());
    analyzers.put(LuceneIndexField.PARENTUUID, new GeoNetworkAnalyzer());
    analyzers.put(LuceneIndexField.OPERATESON, new GeoNetworkAnalyzer());
    analyzers.put(LuceneIndexField.SUBJECT, new KeywordAnalyzer());
    
    pfaw = new PerFieldAnalyzerWrapper(geoNetworkAnalyzer, analyzers );
        return pfaw;
  }


    /**
   *
   * @return  The current analyzer used by the search manager.
   */
  public static PerFieldAnalyzerWrapper getAnalyzer() {
    return _analyzer;
  }
  public static PerFieldAnalyzerWrapper getSearchAnalyzer() {
    return _searchAnalyzer;
  }
    /**
     * Retrieve per field analyzer according to language and for searching or indexing time.
     * 
     * @param language Language for the analyzer.
     * @param forSearching true to return searching time analyzer, false for indexing time analayzer.
     * @return 
     */
    public static PerFieldAnalyzerWrapper getAnalyzer(String language, boolean forSearching) {
        if(Log.isDebugEnabled(Geonet.LUCENE)) {
            Log.debug(Geonet.LUCENE, "Get analyzer for searching: " + forSearching + " and language: " + language);
        }
        
        Map<String, Analyzer> map = forSearching ? searchAnalyzerMap : analyzerMap;
        
        PerFieldAnalyzerWrapper analyzer = (PerFieldAnalyzerWrapper)map.get(language); 
        if(analyzer != null) {
            return analyzer;
        } else {
            if(Log.isDebugEnabled(Geonet.LUCENE)) {
                Log.debug(Geonet.LUCENE, "Returning default analyzer.");
            }
            return forSearching ? _searchAnalyzer : _analyzer;
        }
    }


  /**
   * Returns a default- (hardcoded) configured PerFieldAnalyzerWrapper, creating it if necessary.
     *
   */
  private static void initHardCodedAnalyzers(char[] ignoreChars) {
        if(Log.isDebugEnabled(Geonet.SEARCH_ENGINE))
            Log.debug(Geonet.SEARCH_ENGINE, "initializing hardcoded analyzers");
        // no default analyzer instantiated: create one
        if(_defaultAnalyzer == null) {
            // create hardcoded default PerFieldAnalyzerWrapper w/o stopwords
            _defaultAnalyzer = SearchManager.createHardCodedPerFieldAnalyzerWrapper(null, ignoreChars);
        }
        if (!_stopwordsDir.exists() || !_stopwordsDir.isDirectory()) {
            Log.warning(Geonet.SEARCH_ENGINE, "Invalid stopwords directory " + _stopwordsDir.getAbsolutePath() +
                                              ", not using any stopwords.");
        } else {
            if (Log.isDebugEnabled(Geonet.LUCENE)) {
                Log.debug(Geonet.LUCENE, "loading stopwords");
            }
            File[] files = _stopwordsDir.listFiles();
            if (files == null) {
                files = new File[0];
            }


            for (File stopwordsFile : files) {
                String language = stopwordsFile.getName().substring(0, stopwordsFile.getName().indexOf('.'));
                if (language.length() != 2) {
                    Log.info(Geonet.LUCENE, "invalid iso 639-1 code for language: " + language);
                }
                // look up stopwords for that language
                Set<String> stopwordsForLanguage = StopwordFileParser.parse(stopwordsFile.getAbsolutePath());
                if (stopwordsForLanguage != null) {
                    if (Log.isDebugEnabled(Geonet.LUCENE)) {
                        Log.debug(Geonet.LUCENE, "loaded # " + stopwordsForLanguage.size() + " stopwords for language " + language);
                    }
                    Analyzer languageAnalyzer = SearchManager.createHardCodedPerFieldAnalyzerWrapper(stopwordsForLanguage, ignoreChars);
                    analyzerMap.put(language, languageAnalyzer);
                } else {
                    if (Log.isDebugEnabled(Geonet.LUCENE)) {
                        Log.debug(Geonet.LUCENE, "failed to load any stopwords for language " + language);
                    }
                }
            }
        }        
  }


    /**
     * Creates an analyzer based on its definition in the Lucene config.
     *
     * @param analyzerClassName Class name of analyzer to create
     * @param field The Lucene field this analyzer is created for
     * @param stopwords Set stop words if analyzer class name equal org.fao.geonet.kernel.search.GeoNetworkAnalyzer.
     * @return
     */
    private Analyzer createAnalyzerFromLuceneConfig(final String analyzerClassName, final String field, final Set<String> stopwords) {
        final char[] ignoreChars = _settingInfo.getAnalyzerIgnoreChars();
        Analyzer analyzer = null;
        try {
            if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
                Log.debug(Geonet.SEARCH_ENGINE, "Creating analyzer defined in Lucene config:" + analyzerClassName);
            }
            // GNA analyzer
            if (analyzerClassName.equals("org.fao.geonet.kernel.search.GeoNetworkAnalyzer")) {
                analyzer = SearchManager.createGeoNetworkAnalyzer(stopwords, ignoreChars);
            } else {
                // non-GNA analyzer
                try {
                    @SuppressWarnings("unchecked")
          Class<? extends Analyzer> analyzerClass = (Class<? extends Analyzer>) Class.forName(analyzerClassName);
                    Class<?>[] clTypesArray = _luceneConfig.getAnalyzerParameterClass((field == null ? "" : field) + analyzerClassName);
                    Object[] inParamsArray = _luceneConfig.getAnalyzerParameter((field == null ? "" : field) + analyzerClassName);


                    try {
                        if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
                            Log.debug(Geonet.SEARCH_ENGINE, " Creating analyzer with parameter");
                        }
                        Constructor<? extends Analyzer> c = analyzerClass.getConstructor(clTypesArray);
                        analyzer = c.newInstance(inParamsArray);
                    } catch (Exception x) {
                        Log.warning(Geonet.SEARCH_ENGINE, "   Failed to create analyzer with parameter: " + x.getMessage());
                        x.printStackTrace();
                        // Try using a default constructor without parameter
                        Log.warning(Geonet.SEARCH_ENGINE, "   Now trying without parameter");
                        analyzer = analyzerClass.newInstance();
                    }
                } catch (Exception y) {
                    Log.warning(Geonet.SEARCH_ENGINE, "Failed to create analyzer as specified in lucene config, default analyzer will " +
                                                      "be used for field " + field + ". Exception message is: " + y.getMessage());
                    y.printStackTrace();
                    // abandon and continue with next field defined in lucene config
                }
            }
        } catch (Exception z) {
            Log.warning(Geonet.SEARCH_ENGINE, " Error on analyzer initialization: " + z.getMessage() + ". Check your Lucene " +
                                              "configuration. Hardcoded default analyzer will be used for field " + field);
            z.printStackTrace();
        } finally {
            // creation of analyzer has failed, default to GeoNetworkAnalyzer
            if(analyzer == null) {
                Log.warning(Geonet.SEARCH_ENGINE, "Creating analyzer has failed, defaulting to GeoNetworkAnalyzer");
                analyzer = SearchManager.createGeoNetworkAnalyzer(stopwords, ignoreChars);
            }
        }
        return analyzer;
    }


  /**
   * Creates analyzers. An analyzer is created for each language that has a stopwords file, and a stopword-less
     * default analyzer is also created.
   *
     * If no analyzer class is specified in Lucene configuration, a default hardcoded PerFieldAnalyzer is created
     * (@see #initHardCodedAnalyzers()).
     *
   * If an error occurs instantiating an analyzer, GeoNetworkAnalyzer is used.
   */
  public void createAnalyzer() {
        String defaultAnalyzerClass = _luceneConfig.getDefaultAnalyzerClass();
        if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
            Log.debug(Geonet.SEARCH_ENGINE, "createAnalyzer start");
            Log.debug(Geonet.SEARCH_ENGINE, "defaultAnalyzer defined in Lucene config: " + defaultAnalyzerClass);
        }
        // there is no default analyzer defined in lucene config


        char[] ignoreChars = _settingInfo.getAnalyzerIgnoreChars();
    if (defaultAnalyzerClass == null) {
            // create default (hardcoded) analyzer
            SearchManager.initHardCodedAnalyzers(ignoreChars);
    } else {
            // there is an analyzer defined in lucene config


            if (!_stopwordsDir.exists() || !_stopwordsDir.isDirectory()) {
                Log.warning(Geonet.SEARCH_ENGINE, "Invalid stopwords directory " + _stopwordsDir.getAbsolutePath() +
                                                  ", not using any stopwords.");
            } else {
                if (Log.isDebugEnabled(Geonet.LUCENE)) {
                    Log.debug(Geonet.LUCENE, "Loading stopwords and creating per field anlayzer ...");
                }
                
                // One per field analyzer is created for each stopword list available using GNA as default analyzer
                // Configuration can't define different analyzer per language.
                // TODO : http://trac.osgeo.org/geonetwork/ticket/900
                File[] files = _stopwordsDir.listFiles();
                if (files == null) {
                    files = new File[0];
                }
                for (File stopwordsFile : files) {
                    String language = stopwordsFile.getName().substring(0, stopwordsFile.getName().indexOf('.'));
                    // TODO check for valid ISO 639-2 codes could be better than this
                    if (language.length() != 3) {
                        Log.warning(Geonet.LUCENE, "Stopwords file with incorrect ISO 639-2 language as filename: " + language);
                    }
                    // look up stopwords for that language
                    Set<String> stopwordsForLanguage = StopwordFileParser.parse(stopwordsFile.getAbsolutePath());
                    if (stopwordsForLanguage != null) {
                        if (Log.isDebugEnabled(Geonet.LUCENE)) {
                            Log.debug(Geonet.LUCENE, "Loaded # " + stopwordsForLanguage.size() + " stopwords for language " + language);
                        }


                        // Configure per field analyzer and register them to language map of pfa
                        // ... for indexing
                        configurePerFieldAnalyzerWrapper(defaultAnalyzerClass, _luceneConfig.getFieldSpecificAnalyzers(),
                                analyzerMap, language, stopwordsForLanguage);
                        
                        // ... for searching
                        configurePerFieldAnalyzerWrapper(defaultAnalyzerClass, _luceneConfig.getFieldSpecificSearchAnalyzers(),
                                searchAnalyzerMap, language, stopwordsForLanguage);


                    } else {
                        if (Log.isDebugEnabled(Geonet.LUCENE)) {
                            Log.debug(Geonet.LUCENE, "Failed to load any stopwords for language " + language);
                        }
                    }
                }
            }
            
            // Configure default per field analyzer
            _analyzer = configurePerFieldAnalyzerWrapper(defaultAnalyzerClass, _luceneConfig.getFieldSpecificAnalyzers(),
                null, null, null);


            _searchAnalyzer = configurePerFieldAnalyzerWrapper(defaultAnalyzerClass, _luceneConfig.getFieldSpecificSearchAnalyzers(),
                null, null, null);
        }
    }


  /**
   * Create, configure and optionnaly register in a list a per field analyzer wrapper.
   * 
   * @param defaultAnalyzerClass The default analyzer to use
   * @param fieldAnalyzers  The list of extra analyzer per field
   * @param referenceMap  A map where to reference the per field analyzer
   * @param referenceKey  The reference key
   * @param stopwordsForLanguage  The stopwords to use (only for GNA)
   * 
   * @return The per field analyzer wrapper
   */
  private PerFieldAnalyzerWrapper configurePerFieldAnalyzerWrapper(
      String defaultAnalyzerClass, Map<String, String> fieldAnalyzers,
      Map<String, Analyzer> referenceMap, String referenceKey,
      Set<String> stopwordsForLanguage) {


    // Create the default analyzer according to Lucene config
    if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
            Log.debug(Geonet.SEARCH_ENGINE, " Default analyzer class: " + defaultAnalyzerClass);
        }
    Analyzer defaultAnalyzer = createAnalyzerFromLuceneConfig(defaultAnalyzerClass, null, stopwordsForLanguage);
    
    Map<String, Analyzer> extraFieldAnalyzers = new HashMap<String, Analyzer>();
    
    // now handle the exceptions for each field to the default analyzer as
    // defined in lucene config
    for (Entry<String, String> e : fieldAnalyzers.entrySet()) {
      String field = e.getKey();
      String aClassName = e.getValue();
      if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
                Log.debug(Geonet.SEARCH_ENGINE, " Add analyzer for field: " + field + "=" + aClassName);
            }
      Analyzer analyzer = createAnalyzerFromLuceneConfig(aClassName, field, stopwordsForLanguage);
      extraFieldAnalyzers.put(field, analyzer);
    }


    PerFieldAnalyzerWrapper pfa = new PerFieldAnalyzerWrapper( defaultAnalyzer, extraFieldAnalyzers);
    


    // Register to a reference map if needed
    if (referenceMap != null) {
      referenceMap.put(referenceKey, pfa);
    }
    
    return pfa;
  }


    /**
     * TODO javadoc.
     *
     *
     *
     *
     *
     *
     *
     *
     * @param logAsynch
     * @param logSpatialObject
     * @param luceneTermsToExclude
     * @param maxWritesInTransaction
     * @throws Exception
     */
  public void init(boolean logAsynch, boolean logSpatialObject, String luceneTermsToExclude,
                     int maxWritesInTransaction) throws Exception {
        _summaryConfigValues = _luceneConfig.getTaxonomy().get("hits");


        String appPath = _geonetworkDataDirectory.getWebappDir();
    _stylesheetsDir = new File(appPath, SEARCH_STYLESHEETS_DIR_PATH);
        _stopwordsDir = new File(appPath + STOPWORDS_DIR_PATH);


        createAnalyzer();
        createDocumentBoost();
        
    if (!_stylesheetsDir.isDirectory()) {
            throw new Exception("directory " + _stylesheetsDir + " not found");
        }


    File htmlCacheDirTest   = _geonetworkDataDirectory.getHtmlCacheDir();
    IO.mkdirs(htmlCacheDirTest, "Html cache directory");
    _htmlCacheDir = htmlCacheDirTest.getAbsolutePath();




        _spatial = new Spatial(_applicationContext.getBean(DataStore.class), maxWritesInTransaction);


        _logAsynch = logAsynch;
     _logSpatialObject = logSpatialObject;
     _luceneTermsToExclude = luceneTermsToExclude;


    initLucene();
    initZ3950();
    
    _luceneOptimizerManager = new LuceneOptimizerManager(this, _settingInfo);
  }


    /**
     * TODO javadoc.
     */
  private void createDocumentBoost() {
        String className = _luceneConfig.getDocumentBoostClass();
      if (className != null) {
          try {
                @SuppressWarnings(value = "unchecked")
              Class<? extends DocumentBoosting> clazz = (Class<? extends DocumentBoosting>) Class.forName(className);
                Class<?>[] clTypesArray = _luceneConfig.getDocumentBoostParameterClass();
                Object[] inParamsArray = _luceneConfig.getDocumentBoostParameter();
                try {
                    if(Log.isDebugEnabled(Geonet.SEARCH_ENGINE))
                        Log.debug(Geonet.SEARCH_ENGINE, " Creating document boost object with parameter");
                    Constructor<? extends DocumentBoosting> c = clazz.getConstructor(clTypesArray);
                    _documentBoostClass = c.newInstance(inParamsArray);
                }
                catch (Exception x) {
                    Log.warning(Geonet.SEARCH_ENGINE, "   Failed to create document boost object with parameter: " + x.getMessage());
                    x.printStackTrace();
                    // Try using a default constructor without parameter
                    Log.warning(Geonet.SEARCH_ENGINE, "   Now trying without parameter");
                    _documentBoostClass = clazz.newInstance();
                }
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
    }


    /**
     * Reload Lucene configuration: update analyzer.
     */
  public void reloadLuceneConfiguration () {
    createAnalyzer();
    createDocumentBoost();
  }


    /**
     * TODO javadoc.
     *
     * @throws Exception
     */
    public void end() throws Exception {
        endZ3950();
        _spatial.end();
        _luceneOptimizerManager.cancel();
        _tracker.close(TimeUnit.MINUTES.toMillis(1), true);
    }




    /**
     * TODO javadoc.
     *
     * @throws Exception
     */
  public synchronized void disableOptimizer() throws Exception {
        Log.info(Geonet.INDEX_ENGINE, "Scheduling thread that optimizes lucene index is disabled");
    _luceneOptimizerManager.cancel();
    }


    /**
     * TODO javadoc.
     *
     * @param optimizerBeginAt
     * @param optimizerInterval
     * @throws Exception
     */
  public synchronized void rescheduleOptimizer(Calendar optimizerBeginAt, int optimizerInterval) throws Exception {
    _luceneOptimizerManager.reschedule(optimizerBeginAt, optimizerInterval);
  }


    /**
     * TODO javadoc.
     *
     * @param type
     * @param stylesheetName
     * @return
     * @throws Exception
     */
  public MetaSearcher newSearcher(int type, String stylesheetName) throws Exception {
    switch (type) {
      case LUCENE:
                return new LuceneSearcher(this, stylesheetName, _luceneConfig);
      case Z3950: return new Z3950Searcher(this, _schemaManager, stylesheetName);
      case UNUSED: return new UnusedSearcher();
      default: throw new Exception("unknown MetaSearcher type: " + type);
    }
  }


  /**
   * Lucene init/end methods. Creates the Lucene index directory.
   * @throws Exception
   */
  private void initLucene() throws Exception {
    setupIndex(false);
  }


  // Z39.50 init/end methods


  /**
     * Initializes the Z3950 client searcher.
   */
  private void initZ3950() {}


  /**
   * deinitializes the Z3950 client searcher.
   */
  private void endZ3950() {}


  public String getHtmlCacheDir() {
    return _htmlCacheDir;
  }


  public boolean getLogAsynch() {
    return _logAsynch;
  }


  public boolean getLogSpatialObject() {
    return _logSpatialObject;
  }


  public String getLuceneTermsToExclude() {
    return _luceneTermsToExclude;
  }
  
  // indexing methods


  /**
   * Indexes a metadata record.
     *
   * @param schemaDir
   * @param metadata
   * @param id
   * @param moreFields
     * @param forceRefreshReaders if true then block all searches until they can obtain a up-to-date reader
   * @throws Exception
   */
  public void index(String schemaDir, Element metadata, String id, List<Element> moreFields,
                      MetadataType metadataType, String root, boolean forceRefreshReaders)
            throws Exception {
        // Update spatial index first and if error occurs, record it to Lucene index
        indexGeometry(schemaDir, metadata, id, moreFields);
        
        // Update Lucene index
        List<IndexInformation> docs = buildIndexDocument(schemaDir, metadata, id, moreFields, metadataType, root);
        _tracker.deleteDocuments(new Term("_id", id));
        for(IndexInformation document : docs ) {
            _tracker.addDocument(document);
        }
        if (forceRefreshReaders) {
            _tracker.maybeRefreshBlocking();
        }
  }
  
    private void indexGeometry(String schemaDir, Element metadata, String id,
            List<Element> moreFields) throws Exception {
        try {
            _spatial.writer().delete(id);
            _spatial.writer().index(schemaDir, id, metadata);
        } catch (Exception e) {
            Log.error(Geonet.INDEX_ENGINE, "Failed to properly index geometry of metadata " + id + ". Error: " + e.getMessage(), e);
            moreFields.add(SearchManager.makeField(INDEXING_ERROR_FIELD, "1", true, true));
            moreFields.add(SearchManager.makeField(INDEXING_ERROR_MSG, "GNIDX-GEOWRITE||" + e.getMessage(), true, false));
        }
        Map<String, String> errors = _spatial.writer().getErrorMessage();
        if (errors.size() > 0) {
            for (Entry<String, String> e : errors.entrySet()) {
            moreFields.add(SearchManager.makeField(INDEXING_ERROR_FIELD, "1", true, true));
            moreFields.add(SearchManager.makeField(INDEXING_ERROR_MSG, "GNIDX-GEO|" + e.getKey() + "|" + e.getValue(), true, false));
            }
        }
  }


    /**
     * TODO javadoc.
     *
     * @param fld
     * @param txt
     * @throws Exception
     */
  public void deleteGroup(String fld, String txt) throws Exception {
    // possibly remove old document
        if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
            Log.debug(Geonet.INDEX_ENGINE,"Deleting document ");
        _tracker.deleteDocuments(new Term(fld, txt));
    
    _spatial.writer().delete(txt);
  }
  
    /**
     * TODO javadoc.
     *
     * @param schemaDir
     * @param metadata
     * @param id
     * @param moreFields
     * @param metadataType
     *@param root @return
     * @throws Exception
     */
     private List<IndexInformation> buildIndexDocument(String schemaDir, Element metadata, String id,
                                                       List<Element> moreFields, MetadataType metadataType,
                                                       String root) throws Exception
     {
        if (Log.isDebugEnabled(Geonet.INDEX_ENGINE)) {
            Log.debug(Geonet.INDEX_ENGINE, "Metadata to index:\n" + Xml.getString(metadata));
        }
         File defaultLangStyleSheet = getIndexFieldsXsl(schemaDir, root, "");
         File otherLocalesStyleSheet = getIndexFieldsXsl(schemaDir, root, "language-");


        Element xmlDoc = getIndexFields(metadata, defaultLangStyleSheet, otherLocalesStyleSheet);


        if (Log.isDebugEnabled(Geonet.INDEX_ENGINE)) {
            Log.debug(Geonet.INDEX_ENGINE, "Indexing fields:\n" + Xml.getString(xmlDoc));
        }


        @SuppressWarnings(value = "unchecked")
        List<Element> documentElements = xmlDoc.getContent();
        Collection<Field> multilingualSortFields = findMultilingualSortElements(documentElements);


        List<IndexInformation> documents = Lists.newArrayList();
        for( Element doc : documentElements ) {
            // add _id field
            SearchManager.addField(doc, LuceneIndexField.ID, id, true, true);


            // add more fields
            for( Element moreField : moreFields ) {
                doc.addContent((Content) moreField.clone());
            }


            String locale = getLocaleFromIndexDoc(doc);
            documents.add(newDocument(locale, doc, multilingualSortFields));
        }
        if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
            Log.debug(Geonet.INDEX_ENGINE, "Lucene document:\n" + Xml.getString(xmlDoc));
        return documents;
  }


    private File getIndexFieldsXsl(String schemaDir, String root, String indexName) {
        if (root == null) {
            root = "";
        }
        root = root.toLowerCase();
        if (root.contains(":")) {
            root = root.split(":",2)[1];
        }


        final String basicName = "index-fields";
        File defaultLangStyleSheet = new File(new File(schemaDir, basicName), indexName + root + ".xsl");
        if (!defaultLangStyleSheet.exists()) {
            defaultLangStyleSheet = new File(new File(schemaDir, basicName), indexName + "default.xsl");
        }
        if (!defaultLangStyleSheet.exists()) {
            // backward compatibility
            defaultLangStyleSheet = new File(schemaDir, indexName + basicName + ".xsl");
        }
        return defaultLangStyleSheet;
    }


    private Collection<Field> findMultilingualSortElements(List<Element> documentElements) {
        Map<String, Field> multilingualSortFields = new HashMap<String, Field>();
        
        for (Element doc : documentElements) {
            String locale = getLocaleFromIndexDoc(doc);


            List<?> fields = doc.getChildren("Field");
            Set<String> configuredMultilingualSortFields = _luceneConfig.getMultilingualSortFields();
            for (Object object : fields) {
                Element field = (Element) object;
                String fieldName = field.getAttributeValue("name");
                if (configuredMultilingualSortFields.contains(fieldName)) {
                    String nameWithLocale = LuceneConfig.multilingualSortFieldName(fieldName, locale);
                    if (!multilingualSortFields.containsKey(nameWithLocale)) {
                        String fieldValue = field.getAttributeValue("string");
                        FieldType fieldType = new FieldType();
                        fieldType.setIndexed(true);
                        fieldType.setIndexOptions(IndexOptions.DOCS_ONLY);
                        fieldType.setOmitNorms(true);
                        fieldType.setTokenized(false);
                        fieldType.setStored(true);
                        multilingualSortFields.put(nameWithLocale, new Field(nameWithLocale, fieldValue, fieldType));
                    }
                }
            }
        }
        return multilingualSortFields.values();
    }


  private String getLocaleFromIndexDoc(Element doc) {
    String locale = doc.getAttributeValue("locale");
    if(locale == null || locale.trim().isEmpty()) {
        locale = Geonet.DEFAULT_LANGUAGE;
    }
    return locale;
  }


  /**
   * Creates a new XML field for the Lucene index and add it to the document.
     *
   * @param xmlDoc
   * @param name
   * @param value
   * @param store
   * @param index
   */
  private static void addField(Element xmlDoc, String name, String value, boolean store, boolean index) {
    Element field = makeField(name, value, store, index);
    xmlDoc.addContent(field);
  }


    /**
    * Creates a new XML field for the Lucene index.
    * 
    * @param name
    * @param value
    * @param store
    * @param index
    * @return
    */
  public static Element makeField(String name, String value, boolean store, boolean index) {
    Element field = new Element("Field");
    field.setAttribute(LuceneFieldAttribute.NAME.toString(), name);
    field.setAttribute(LuceneFieldAttribute.STRING.toString(), value == null ? "" : value);
    field.setAttribute(LuceneFieldAttribute.STORE.toString(), Boolean.toString(store));
    field.setAttribute(LuceneFieldAttribute.INDEX.toString(), Boolean.toString(index));
    return field;
  }




    public enum LuceneFieldAttribute {
        NAME {
            @Override
            public String toString() {
                return "name";
            }
        },
        STRING  {
            @Override
            public String toString() {
                return "string";
            }
        },
        STORE  {
            @Override
            public String toString() {
                return "store";
            }
        },
        INDEX  {
            @Override
            public String toString() {
                return "index";
            }
        }
    }
  /**
   * Extracts text from metadata record.
   *
   *
     * @param metadata
     * @param sb
     * @return all text in the metadata elements for indexing
   */
  private void allText(Element metadata, StringBuilder sb) {
    String text = metadata.getText().trim();
    if (text.length() > 0) {
      if (sb.length() > 0)
        sb.append(" ");
      sb.append(text);
    }
    @SuppressWarnings("unchecked")
        List<Element> children = metadata.getChildren();
        for (Element aChildren : children) {
            allText(aChildren, sb);
        }
  }


    /**
     *  deletes a document.
     *
     * @param fld
     * @param txt
     * @throws Exception
     */
  public void delete(String fld, String txt) throws Exception {
    // possibly remove old document
    _tracker.deleteDocuments(new Term(fld, txt));
    _spatial.writer().delete(txt);
  }
  
    /**
     *  deletes a list of documents.
     *
     * @param fld
     * @param txts
     * @throws Exception
     */
    public void delete(String fld, List<String> txts) throws Exception {
        // possibly remove old document
        for(String txt : txts) {
            _tracker.deleteDocuments(new Term(fld, txt));
        }
        _spatial.writer().delete(txts);
    }




    /**
     * TODO javadoc.
     *
     * @return
     * @throws Exception
     */
  public Set<Integer> getDocsWithXLinks() throws Exception {
        IndexAndTaxonomy indexAndTaxonomy= getNewIndexReader(null);
        
    try {
        GeonetworkMultiReader reader = indexAndTaxonomy.indexReader;


      Set<Integer> docs = new LinkedHashSet<Integer>();
      for (int i = 0; i < reader.maxDoc(); i++) {
        // Commented this out for lucene 4.0 and NRT indexing.  It shouldn't be needed I would guess but leave it here
        // for a bit longer:  Commented out since: Dec 10 2012
        // FIXME: strange lucene hack: sometimes it tries to load a deleted document
        // if (reader.isDeleted(i)) continue; 
        
        DocumentStoredFieldVisitor idXLinkSelector = new DocumentStoredFieldVisitor("_id", "_hasxlinks");
        reader.document(i, idXLinkSelector);
        Document doc = idXLinkSelector.getDocument();
        String id = doc.get("_id");
        String hasxlinks = doc.get("_hasxlinks");
                if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
                    Log.debug(Geonet.INDEX_ENGINE, "Got id "+id+" : '"+hasxlinks+"'");
        if (id == null) {
          Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is "+doc);
          continue;
        }
        if (hasxlinks.trim().equals("1")) {
          docs.add(Integer.valueOf(id));
        }
      }
      return docs;
    }
        finally {
      releaseIndexReader(indexAndTaxonomy);
    }
  }


    /**
     * TODO javadoc.
     *
     * @return
     * @throws Exception
     */
  public Map<String,String> getDocsChangeDate() throws Exception {
        IndexAndTaxonomy indexAndTaxonomy= getNewIndexReader(null);
    try {
        GeonetworkMultiReader reader = indexAndTaxonomy.indexReader;


      int capacity = (int)(reader.maxDoc() / 0.75)+1;
      Map<String,String> docs = new HashMap<String,String>(capacity);
      for (int i = 0; i < reader.maxDoc(); i++) {
        // Commented this out for lucene 4.0 and NRT indexing.  It shouldn't be needed I would guess but leave it here
        // for a bit longer:  Commented out since: Dec 10 2012
        // FIXME: strange lucene hack: sometimes it tries to load a deleted document
        // if (reader.isDeleted(i)) continue;
        
        DocumentStoredFieldVisitor idChangeDateSelector = new DocumentStoredFieldVisitor("_id", "_changeDate");
                reader.document(i, idChangeDateSelector);
                Document doc = idChangeDateSelector.getDocument();
        String id = doc.get("_id");
        if (id == null) {
          Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is "+doc);
          continue;
        }
        docs.put(id, doc.get("_changeDate"));
      }
      return docs;
    }
        finally {
      releaseIndexReader(indexAndTaxonomy);
    }
  }


  /**
   * Browses the index and returns all values for the Lucene field.
   *
   * @param fld  The Lucene field name
   * @return  The list of values for the field
   * @throws Exception
   */
    public Vector<String> getTerms(String fld) throws Exception {
        Vector<String> foundTerms = new Vector<String>();
        IndexAndTaxonomy indexAndTaxonomy = getNewIndexReader(null);
        try {
            @SuppressWarnings("resource")
            AtomicReader reader = SlowCompositeReaderWrapper.wrap(indexAndTaxonomy.indexReader);
            Terms terms = reader.terms(fld);
            if (terms != null) {
                TermsEnum enu = terms.iterator(null);
                BytesRef term = enu.next();
                while (term != null) {
                    if (!term.utf8ToString().equals(fld)) {
                        break;
                    }
                    foundTerms.add(term.utf8ToString());
                    term = enu.next();
                }
            }
            return foundTerms;
        } finally {
            releaseIndexReader(indexAndTaxonomy);
        }
    }


  /**
   * Browses the index for the specified Lucene field and return the list of terms found containing the search value
     * with their frequency.
   *
   * @param fieldName  The Lucene field name
   * @param searchValue  The value to search for. Could be "".
   * @param maxNumberOfTerms  Max number of term's values to look in the index. For large catalogue
   * this value should be increased in order to get better results. If this
   * value is too high, then looking for terms could take more times. The use
   * of good analyzer should allow to reduce the number of useless values like
   * (a, the, ...).
   * @param threshold  Minimum frequency for a term to be returned.
   * @return  An unsorted and unordered list of terms with their frequency.
   * @throws Exception
   */
  public Collection<TermFrequency> getTermsFequency(String fieldName, String searchValue, int maxNumberOfTerms,
                                              int threshold, ServiceContext context) throws Exception {
        Collection<TermFrequency> termList = new ArrayList<TermFrequency>();
        IndexAndTaxonomy indexAndTaxonomy = getNewIndexReader(null);
        String searchValueWithoutWildcard = searchValue.replaceAll("[*?]", "");


        final Element request = new Element("request").addContent(new Element("any").setText(searchValue));
        String language = LuceneSearcher.determineLanguage(context, request, _settingInfo).analyzerLanguage;
        final PerFieldAnalyzerWrapper analyzer = SearchManager.getAnalyzer(language, true);
        String analyzedSearchValue = LuceneSearcher.analyzeText(fieldName, searchValueWithoutWildcard, analyzer);
        boolean startsWithOnly = !searchValue.startsWith("*") && searchValue.endsWith("*");
        
        try {
            GeonetworkMultiReader multiReader = indexAndTaxonomy.indexReader;
            for (AtomicReaderContext atomicReaderContext : multiReader.getContext().leaves()) {
                final AtomicReader reader = atomicReaderContext.reader();
                Terms terms = reader.terms(fieldName);
                if (terms != null) {
                    TermsEnum termEnum = terms.iterator(null);
                    int i = 1;
                        BytesRef term = termEnum.next();
                        while (term != null && i++ < maxNumberOfTerms) {
                            String text = term.utf8ToString();
                            if (termEnum.docFreq() >= threshold) {
                                String analyzedText = LuceneSearcher.analyzeText(fieldName, text, analyzer);
                                if ((startsWithOnly && StringUtils.startsWithIgnoreCase(analyzedText, analyzedSearchValue))
                                        || (!startsWithOnly && StringUtils.containsIgnoreCase(analyzedText, analyzedSearchValue))
                                        || (startsWithOnly && StringUtils.startsWithIgnoreCase(text, searchValueWithoutWildcard))
                                        || (!startsWithOnly && StringUtils.containsIgnoreCase(text, searchValueWithoutWildcard))) {
                                    TermFrequency freq = new TermFrequency(text, termEnum.docFreq());
                                    termList.add(freq);
                                }
                            }
                            term = termEnum.next();
                        }
                }
            }
        } finally {
            releaseIndexReader(indexAndTaxonomy);
        }
        return termList;
    }


  /**
   * Frequence of terms.
     *
   */
  public static class TermFrequency implements Comparable<Object> {
    private String term;
    private int frequency;


        public TermFrequency(String term, int frequency) {
      this.term = term;
      this.frequency = frequency;
    }


    public String getTerm() {
      return this.term;
    }


    public int getFrequency() {
      return this.frequency;
    }
    
        public void setFrequency(int frequency) {
            this.frequency = frequency;
        }
        
    public int compareTo(Object o) {
      if (o instanceof TermFrequency) {
        TermFrequency oFreq = (TermFrequency) o;
        return term.compareTo(oFreq.term);
      } else {
        return 0;
      }
    }


        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + frequency;
            result = prime * result + ((term == null) ? 0 : term.hashCode());
            return result;
        }


        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (getClass() != obj.getClass())
                return false;
            TermFrequency other = (TermFrequency) obj;
            return compareTo(other) == 0;
        }
    
    
  }
  // utilities


    /**
     * TODO javadoc.
     *
     * @param xml
     * @return
     * @throws Exception
     */
    Element getIndexFields(Element xml,
                           File defaultLangStyleSheet,
                           File otherLocalesStyleSheet) throws Exception {
        Element documents = new Element("Documents");
        try {
            Map<String, Object> params = new HashMap<String, Object>();
            params.put("inspire", Boolean.toString(isInspireEnabled()));
            params.put("thesauriDir", _geonetworkDataDirectory.getThesauriDir().getAbsolutePath());


            Element defaultLang = Xml.transform(xml, defaultLangStyleSheet.getAbsolutePath(), params);
            if (otherLocalesStyleSheet.exists()) {
                @SuppressWarnings(value = "unchecked")
                List<Element> otherLanguages = Xml.transform(xml, otherLocalesStyleSheet.getAbsolutePath(), params).removeContent();
                mergeDefaultLang(defaultLang, otherLanguages);
                documents.addContent(otherLanguages);
            }
            documents.addContent(defaultLang);
        }
        catch (Exception e) {
            Log.error(Geonet.INDEX_ENGINE, "Indexing stylesheet contains errors : " + e.getMessage() + "\n\t Marking the metadata as _indexingError=1 in index");
            Element xmlDoc = new Element("Document");
            SearchManager.addField(xmlDoc, INDEXING_ERROR_FIELD, "1", true, true);
            SearchManager.addField(xmlDoc, INDEXING_ERROR_MSG, "GNIDX-XSL||" + e.getMessage(), true, false);
            StringBuilder sb = new StringBuilder();
            allText(xml, sb);
            SearchManager.addField(xmlDoc, "any", sb.toString(), false, true);
            documents.addContent(xmlDoc);
        }
        return documents;
    }


    private boolean isInspireEnabled() {
        return _settingInfo.getInspireEnabled();
    }


    // utilities


    /**
     * If otherLanguages has a document that is the same locale as the default then remove it from otherlanguages and
     * merge the fields with those in defaultLang.
     *
     * @param defaultLang
     * @param otherLanguages
     */
    @SuppressWarnings(value = "unchecked")
    private void mergeDefaultLang( Element defaultLang, List<Element> otherLanguages ) {
        final String langCode;
        if (defaultLang.getAttribute("locale") == null) {
            langCode = "";
        }
        else {
            langCode = defaultLang.getAttributeValue("locale");
        }


        Element toMerge = null;


        for( Element element : otherLanguages ) {
            String clangCode;
            if (element.getAttribute("locale") == null) {
                clangCode = "";
            }
            else {
                clangCode = element.getAttributeValue("locale");
            }


            if (clangCode.equals(langCode)) {
                toMerge = element;
                break;
            }
        }


        SortedSet<Element> toInclude = new TreeSet<Element>(new Comparator<Element>(){
            public int compare( Element o1, Element o2 ) {
                // <Field name="_locale" string="{string($iso3LangId)}" store="true" index="true" token="false"/>
                int name = compare(o1, o2, "name");
                int string = compare(o1, o2, "string");
                int store = compare(o1, o2, "store");
                int index = compare(o1, o2, "index");
                if (name != 0) {
                    return name;
                }
                if (string != 0) {
                    return string;
                }
                if (store != 0) {
                    return store;
                }
                if (index != 0) {
                    return index;
                }
                return 0;
            }
            private int compare( Element o1, Element o2, String attName ) {
                return safeGet(o1, attName).compareTo(safeGet(o2, attName));
            }
            public String safeGet( Element e, String attName ) {
                String att = e.getAttributeValue(attName);
                if (att == null) {
                    return "";
                } else {
                    return att;
                }
            }
        });


        if (toMerge != null) {
            toMerge.detach();
            otherLanguages.remove(toMerge);
            for( Element element : (List<Element>) defaultLang.getChildren() ) {
                toInclude.add(element);
            }
            for( Element element : (List<Element>) toMerge.getChildren() ) {
                toInclude.add(element);
            }
            toMerge.removeContent();
            defaultLang.removeContent();
            defaultLang.addContent(toInclude);
        }
    }


    /**
     * TODO javadoc.
     *
     * @param styleSheetName
     * @param xml
     * @return
     * @throws Exception
     */
  Element transform(String styleSheetName, Element xml) throws Exception {
    try {
      String styleSheetPath = new File(_stylesheetsDir, styleSheetName).getAbsolutePath();
      return Xml.transform(xml, styleSheetPath);
    } catch (Exception e) {
      Log.error(Geonet.INDEX_ENGINE, "Search stylesheet contains errors : " + e.getMessage());
      throw e;
    }
  }


    public IndexAndTaxonomy getIndexReader(String preferedLang, long versionToken) throws IOException {
        return _tracker.acquire(preferedLang, versionToken);
    }
    public IndexAndTaxonomy getNewIndexReader(String preferedLang) throws IOException, InterruptedException {
       Log.debug(Geonet.INDEX_ENGINE,"Ask for new reader");
       return getIndexReader(preferedLang, -1L);
    }
  public void releaseIndexReader(IndexAndTaxonomy reader) throws InterruptedException, IOException {
      reader.indexReader.releaseToNRTManager();
  }


    /**
     * Creates an index in directory luceneDir if not already there.
     *
     * @param rebuild
     * @throws Exception
     */
  private void setupIndex(boolean rebuild) throws Exception {
      boolean badIndex = false;


          try {
                IndexAndTaxonomy reader = _tracker.acquire(null, -1);
                reader.indexReader.releaseToNRTManager();
            } catch (Throwable e) {
                badIndex = true;
                Log.error(Geonet.INDEX_ENGINE,
                        "Exception while opening lucene index, going to rebuild it: " , e);
            }


      // if rebuild forced or bad index then rebuild index
    if (rebuild || badIndex) {
      Log.error(Geonet.INDEX_ENGINE, "Rebuilding lucene index");


      _tracker.reset(TimeUnit.MINUTES.toMillis(5));
      if (_spatial != null){
        try {
        _spatial.writer().reset();
        } catch (Exception e) {
          Log.error(Geonet.INDEX_ENGINE, "Failure resetting spatial index.", e);
        }
      }
      _tracker.open(Geonet.DEFAULT_LANGUAGE);
    }
  }


    /**
   *  Rebuilds the Lucene index. If xlink or from selection parameters
     *  are defined, reindex a subset of record. Otherwise reindex all records.
   *
   *  @param context
   *  @param xlinks   Search all docs with XLinks, clear the XLinks cache and index all records found.
     *  @param reset
     *  @param fromSelection    Reindex all records from selection.
   *
     * @return
     * @throws Exception
     */
  public boolean rebuildIndex(ServiceContext context,
                                boolean xlinks,
                                boolean reset,
                                boolean fromSelection) throws Exception {
    GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);


    DataManager dataMan = gc.getBean(DataManager.class);


    try {
      if (reset) {
        synchronized (_tracker) {
              setupIndex(false);
                }
      }
            if (fromSelection) {
                dataMan.rebuildIndexForSelection(context, xlinks);
            } else if (xlinks) {
                dataMan.rebuildIndexXLinkedMetadata(context);
            } else {
                synchronized (_tracker) {
                    setupIndex(true);
                }
                dataMan.init(context, true);
            }
      return true;
    }
        catch (Exception e) {
      Log.error(Geonet.INDEX_ENGINE, "Exception while rebuilding lucene index, going to rebuild it: " +
                    e.getMessage(), e);
      return false;
    }
  }


    /**
     * Creates a new {@link Document} for each input fields in xml taking {@link LuceneConfig} and field's attributes
     * for configuration.
     *
     *
     * @param locale
     * @param xml    The list of field to be indexed.
     * @param multilingualSortFields
     * @return
     */
  private IndexInformation newDocument(String language, Element xml, Collection<Field> multilingualSortFields)
  {
    Document doc = new Document();
    Collection<CategoryPath> categories = new HashSet<CategoryPath>();
      
    
    for (Field field : multilingualSortFields) {
            doc.add(field);
        }
    final FieldType storeNotTokenizedFieldType = new FieldType();
    storeNotTokenizedFieldType.setIndexed(true);
    storeNotTokenizedFieldType.setTokenized(false);
    storeNotTokenizedFieldType.setStored(true);


    final FieldType storeNotIndexedFieldType = new FieldType();
    storeNotIndexedFieldType.setIndexed(true);
    storeNotIndexedFieldType.setTokenized(false);
    storeNotIndexedFieldType.setStored(true);
    float documentBoost = 1;
        // Set boost to promote some types of document selectively according to DocumentBoosting class
        if (_documentBoostClass != null) {
            Float f = (_documentBoostClass).getBoost(xml);
            if (f != null) {
                if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
                    Log.debug(Geonet.INDEX_ENGINE, "Boosting document with boost factor: " + f);
                documentBoost = f;
            }
        }


    boolean hasLocaleField = false;
        for (Object o : xml.getChildren()) {
            Element field = (Element) o;
            String name = field.getAttributeValue(LuceneFieldAttribute.NAME.toString());
            String string = field.getAttributeValue(LuceneFieldAttribute.STRING.toString()); // Lower case field is handled by Lucene Analyzer.
            if(name.equals(Geonet.LUCENE_LOCALE_KEY)) hasLocaleField = true;
            if (string.trim().length() > 0) {
              String sStore = field.getAttributeValue(LuceneFieldAttribute.STORE.toString());
                String sIndex = field.getAttributeValue(LuceneFieldAttribute.INDEX.toString());


                boolean bStore = sStore != null && sStore.equals("true");
                boolean bIndex = sIndex != null && sIndex.equals("true");
                boolean token = _luceneConfig.isTokenizedField(name);
                boolean isNumeric = _luceneConfig.isNumericField(name);
                boolean isFacetField = _luceneConfig.isFacetField(name);


                FieldType fieldType = new FieldType();
                fieldType.setStored(bStore);
                fieldType.setIndexed(bIndex);
                fieldType.setTokenized(token);
                Field f;
                Field fForFacet = null;
                if (isNumeric) {
                    try {
                        f = addNumericField(name, string, fieldType);
                    } catch (Exception e) {
                        String msg = "Invalid value. Field '" + name + "' is not added to the document. Error is: " + e.getMessage();


                        Field idxError = new Field(INDEXING_ERROR_FIELD, "1", storeNotTokenizedFieldType);
                        Field idxMsg = new Field(INDEXING_ERROR_MSG, "GNIDX-BADNUMVALUE|" + name + "|" +  e.getMessage(), storeNotIndexedFieldType);


                        doc.add(idxError);
                        doc.add(idxMsg);


                        Log.warning(Geonet.INDEX_ENGINE, msg);
                        // If an exception occur, the field is not added to the document
                        // and to the taxonomy
                        continue;
                    }
                } else {
                    // TODO: Can we use the same field for facet and search ?
                    if (isFacetField) {
                        fForFacet = new FacetField(name + FACET_FIELD_SUFFIX, string);
                        if(Log.isDebugEnabled(Geonet.INDEX_ENGINE)) {
                            Log.debug(Geonet.INDEX_ENGINE, "Facet field: " + fForFacet.toString());
                        }
                    }
                    f = new Field(name, string, fieldType);
                }


                // As of lucene 4.0 to boost a document all field boosts must be premultiplied by documentBoost
                // because there is no doc.setBoost method anymore.
                // Boost a particular field according to Lucene config.


                // You cannot set an index-time boost on an unindexed field, or one that omits norms
                if (bIndex && !f.fieldType().omitNorms()) {
                    Float boost = _luceneConfig.getFieldBoost(name);
                    if (boost != null) {
                        if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
                            Log.debug(Geonet.INDEX_ENGINE, "Boosting field: " + name + " with boost factor: " + boost + " x " + documentBoost);
                        f.setBoost(documentBoost * boost);
                    } else if(documentBoost != 1) {
                        f.setBoost(documentBoost);
                    }
                }
                if(fForFacet != null) {
                    doc.add(fForFacet);
                }
                doc.add(f);
            }
        }
        
        if(!hasLocaleField) {
           doc.add(new Field(Geonet.LUCENE_LOCALE_KEY,Geonet.DEFAULT_LANGUAGE, storeNotTokenizedFieldType));
        }
        
        return new IndexInformation(language, doc, categories);


  }


  /**
   * Creates Lucene numeric field.
   * 
   * @param name  The field name
   * @param string  The value to be indexed. It is parsed to its numeric type. If exception occurs
   * field is not added to the index. 
   * @param fieldType
   * @return
   * @throws Exception 
   */
  private Field addNumericField(String name, String string, FieldType fieldType) throws Exception {
        LuceneConfigNumericField fieldConfig = _luceneConfig.getNumericField(name);
    
    Field field;
    // TODO : reuse the numeric field for better performance
        if(Log.isDebugEnabled(Geonet.INDEX_ENGINE))
            Log.debug(Geonet.INDEX_ENGINE, "Indexing numeric field: " + name + " with value: " + string );
    try {
      String paramType = fieldConfig.getType();
      if ("double".equals(paramType)) {
        double d = Double.valueOf(string);
        fieldType.setNumericType(NumericType.DOUBLE);
        field = new DoubleField(name, d, fieldType);
      }
            else if ("float".equals(paramType)) {
        float f = Float.valueOf(string);
        fieldType.setNumericType(NumericType.FLOAT);
        field = new FloatField(name, f, fieldType);
      }
            else if ("long".equals(paramType)) {
        long l = Long.valueOf(string);
        fieldType.setNumericType(NumericType.LONG);
        field = new LongField(name, l, fieldType);
      }
            else {
        int i = Integer.valueOf(string);
        fieldType.setNumericType(NumericType.INT);
        field = new IntField(name, i, fieldType);
      }
      return field;
    }
        catch (Exception e) {
      Log.warning(Geonet.INDEX_ENGINE, "Failed to index numeric field: " + name + " with value: " + string + ", error is: " + e.getMessage());
      throw e;
    }
  }


  public Spatial getSpatial() {
        return _spatial;
    }


    /**
     * TODO javadoc.
     *
     */
  public class Spatial {
    private final DataStore _datastore;
        private static final long   TIME_BETWEEN_SPATIAL_COMMITS_IN_SECONDS = 10;
        private final Map<String, Constructor<? extends SpatialFilter>> _types;
        {
            Map<String, Constructor<? extends SpatialFilter>> types = new HashMap<String, Constructor<? extends SpatialFilter>>();
            try {
                types.put(Geonet.SearchResult.Relation.ENCLOSES, constructor(ContainsFilter.class));
                types.put(Geonet.SearchResult.Relation.CROSSES, constructor(CrossesFilter.class));
                types.put(Geonet.SearchResult.Relation.OUTSIDEOF, constructor(IsFullyOutsideOfFilter.class));
                types.put(Geonet.SearchResult.Relation.EQUAL, constructor(EqualsFilter.class));
                types.put(Geonet.SearchResult.Relation.INTERSECTION, constructor(IntersectionFilter.class));
                types.put(Geonet.SearchResult.Relation.OVERLAPS, constructor(OverlapsFilter.class));
                types.put(Geonet.SearchResult.Relation.TOUCHES, constructor(TouchesFilter.class));
                types.put(Geonet.SearchResult.Relation.WITHIN, constructor(WithinFilter.class));
                // types.put(Geonet.SearchResult.Relation.CONTAINS, constructor(BeyondFilter.class));
                // types.put(Geonet.SearchResult.Relation.CONTAINS, constructor(DWithinFilter.class));
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Unable to create types mapping", e);
            }
            _types = Collections.unmodifiableMap(types);
        }
        private final Transaction                     _transaction;
        private final int                             _maxWritesInTransaction;
        private final Parser                          _gmlParser;
        private final Lock                            _lock;
        private SpatialIndexWriter                    _writer;
        private volatile Committer                    _committerTask;


        /**
         * TODO javadoc.
         *
         * @param dataStore
         * @param maxWritesInTransaction - Number of features to write 
         * to before commit - set 1 and the transaction will be
         * autocommit which results in faster loading for some (all?) 
         * configurations and does not keep a long running transaction 
         * open. 
         * @throws Exception
         */
        public Spatial(DataStore dataStore, int maxWritesInTransaction) throws Exception {
            _lock = new ReentrantLock();
            _datastore = dataStore;


      if (maxWritesInTransaction > 1) {
              _transaction = new DefaultTransaction("SpatialIndexWriter");
            }
            else {
              _transaction = Transaction.AUTO_COMMIT;
            }
            _maxWritesInTransaction = maxWritesInTransaction;
            _gmlParser = new Parser(new GMLConfiguration());
            boolean rebuildIndex;


            rebuildIndex = createWriter(_datastore);
            if (rebuildIndex) {
                setupIndex(true);
            }
            else{
                // since the index is considered good we will
                // call getIndex to make sure the in-memory index is
                // generated
                _writer.getIndex();
            }
        }


        /**
         * TODO javadoc.
         *
         * @param datastore
         * @return
         * @throws IOException
         */
        private boolean createWriter(DataStore datastore) throws IOException {
            boolean rebuildIndex;
            try {
                _writer = new SpatialIndexWriter(datastore, _gmlParser,_transaction, _maxWritesInTransaction, _lock);
        rebuildIndex = _writer.getFeatureSource().getSchema() == null;
            }
            catch (Throwable e) {


        if (_writer == null) {
          throw new RuntimeException(e);
        }
                String exceptionString = Xml.getString(JeevesException.toElement(e));
                Log.warning(Geonet.SPATIAL, "Failure to make _writer, maybe a problem but might also not be an issue:" +
                        exceptionString);
                try {
                    _writer.reset();
                }
                catch (Exception e1) {
                    Log.error(Geonet.SPATIAL, "Unable to call reset on Spatial writer: "+e1.getMessage());
                    e1.printStackTrace();
                }
                rebuildIndex = true;
            }
            return rebuildIndex;
        }


        /**
         * Closes spatial index.
         */
    public void end() {
            _lock.lock();
            try {
                _writer.close();
            }
            catch (IOException e) {
                Log.error(Geonet.SPATIAL,"error closing spatial index: "+e.getMessage());
                e.printStackTrace();
            }
            finally {
                _lock.unlock();
            }
        }


        /**
         * TODO javadoc.
         *
         * @param query
         * @param filterExpr
         * @param filterVersion
         * @return
         * @throws Exception
         */
        public Filter filter(org.apache.lucene.search.Query query, int numHits, Element filterExpr, String filterVersion)
                throws Exception {
            _lock.lock();
            try {
              Parser filterParser = getFilterParser(filterVersion);
                Pair<FeatureSource<SimpleFeatureType, SimpleFeature>, SpatialIndex> accessor = new SpatialIndexAccessor();
                return OgcGenericFilters.create(query, numHits, filterExpr, accessor, filterParser);
            }
            catch (Exception e) {
              // TODO Handle NPE creating spatial filter (due to constraint language version).
          throw new IllegalArgumentException("Error when parsing spatial filter (version: " + filterVersion + "):" +
                        Xml.getString(filterExpr) + ". Error is: " + e.toString());
            }
            finally {
                _lock.unlock();
            }
        }


        /**
         * TODO javadoc.
         *
         * @param query
         * @param geom
         * @param request
         * @return
         * @throws Exception
         */
        public SpatialFilter filter(org.apache.lucene.search.Query query, int numHits,
                Collection<Geometry> geom, Element request) throws Exception {
            _lock.lock();
            try {
                String relation = Util.getParam(request, Geonet.SearchResult.RELATION,
                        Geonet.SearchResult.Relation.INTERSECTION);
                if(geom.size() == 1) {
                    return _types.get(relation.toLowerCase()).newInstance(query, numHits, geom.iterator().next(), new SpatialIndexAccessor());
                } else {
                    Collection<SpatialFilter> filters = new ArrayList<SpatialFilter>(geom.size());
                    Envelope bounds = null;
                    for (Geometry geometry : geom) {
                        if(bounds == null) {
                            bounds = geometry.getEnvelopeInternal();
                        } else {
                            bounds.expandToInclude(geometry.getEnvelopeInternal());
                        }
                        filters.add(_types.get(relation).newInstance(query, numHits, geometry, new SpatialIndexAccessor()));
                    }
                    return new OrSpatialFilter(query, numHits, bounds, new SpatialIndexAccessor(), filters);
                }
            }
            finally {
                _lock.unlock();
            }
        }


        /**
         * TODO javadoc.
         *
         * @return
         * @throws Exception
         */
        public SpatialIndexWriter writer() throws Exception {
            _lock.lock();
            try {
                if (_committerTask != null) {
                    _committerTask.cancel();
                }
                _committerTask = new Committer();
                timer.schedule(_committerTask, TIME_BETWEEN_SPATIAL_COMMITS_IN_SECONDS, TimeUnit.SECONDS);
                return writerNoLocking();
            }
            finally {
                _lock.unlock();
            }
        }


        /**
         * TODO javadoc.
         * @return
         * @throws Exception
         */
        private SpatialIndexWriter writerNoLocking() throws Exception {
            if (_writer == null) {
                _writer = new SpatialIndexWriter(_datastore, _gmlParser, _transaction, _maxWritesInTransaction, _lock);
            }
            return _writer;
        }


        /**
         * TODO javadoc.
         *
         * @param filterVersion
         * @return
         */
        private Parser getFilterParser(String filterVersion) {
      Configuration config;
      if (filterVersion.equals(FilterCapabilities.VERSION_100)) {
          config = FILTER_1_0_0;
      } else if (filterVersion.equals(FilterCapabilities.VERSION_200)) {
          config = FILTER_2_0_0;
      } else if (filterVersion.equals(FilterCapabilities.VERSION_110)) {
          config = FILTER_1_1_0;
      } else {
          throw new IllegalArgumentException("UnsupportFilterVersion: "+filterVersion);
      }
      return new Parser(config);
    }


        private final class SpatialIndexAccessor
        extends
        Pair<FeatureSource<SimpleFeatureType, SimpleFeature>, SpatialIndex> {
      @Override
      public FeatureSource<SimpleFeatureType, SimpleFeature> one() {
          return _writer.getFeatureSource();
      }


      @Override
      public SpatialIndex two() {
          try {
              return _writer.getIndex();
          } catch (IOException e) {
              throw new RuntimeException(e);
          }
      }
    }


    /**
         * TODO javadoc.
         *
         */
        private class Committer implements Runnable {
            private AtomicBoolean cancelled = new AtomicBoolean(false);


            @Override
            public void run() {
                if (cancelled.get()) {
                    return;
                }
                _lock.lock();
                try {
                    if (_committerTask == this) {
                        _writer.commit();
                        _committerTask = null;
                    }
                }
                catch (IOException e) {
                    Log.error(Geonet.SPATIAL, "error writing spatial index "+e.getMessage());
                }
                finally {
                    _lock.unlock();
                }
            }


            public void cancel() {
                this.cancelled.set(true);
            }
        }
    }


    /**
     * TODO javadoc.
     *
     * @param clazz
     * @return
     * @throws SecurityException
     * @throws NoSuchMethodException
     */
    private static Constructor<? extends SpatialFilter> constructor(Class<? extends SpatialFilter> clazz)
            throws SecurityException, NoSuchMethodException {
        return clazz.getConstructor(org.apache.lucene.search.Query.class, int.class, Geometry.class, Pair.class);
    }


    LuceneIndexLanguageTracker getIndexTracker() {
    return _tracker;
  }


    public boolean optimizeIndex() {
        try {
            _tracker.optimize();
            return true;
        } catch (Throwable e) {
            Log.error(Geonet.INDEX_ENGINE, "Exception while optimizing lucene index: " + e.getMessage());
            return false;
        }
    }
}
Source Code of org.fao.geonet.kernel.search.SearchManager

Related Classes of org.fao.geonet.kernel.search.SearchManager