Package com.flaptor.indextank.index

Source Code of com.flaptor.indextank.index.IndexEngine

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.flaptor.indextank.index;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.json.simple.JSONValue;

import com.flaptor.indextank.BoostingIndexer;
import com.flaptor.indextank.DocumentStoringIndexer;
import com.flaptor.indextank.IndexRecoverer;
import com.flaptor.indextank.LogIndexRecoverer;
import com.flaptor.indextank.blender.Blender;
import com.flaptor.indextank.dealer.Dealer;
import com.flaptor.indextank.index.lsi.LargeScaleIndex;
import com.flaptor.indextank.index.rti.RealTimeIndex;
import com.flaptor.indextank.index.scorer.BoostsScorer;
import com.flaptor.indextank.index.scorer.DynamicDataFacetingManager;
import com.flaptor.indextank.index.scorer.DynamicDataManager;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.NoFacetingManager;
import com.flaptor.indextank.index.scorer.ScoreFunction;
import com.flaptor.indextank.index.scorer.UserFunctionsManager;
import com.flaptor.indextank.index.storage.InMemoryStorage;
import com.flaptor.indextank.query.IndexEngineParser;
import com.flaptor.indextank.query.analyzers.CompositeAnalyzer;
import com.flaptor.indextank.query.analyzers.FilteringAnalyzer;
import com.flaptor.indextank.rpc.IndexerServer;
import com.flaptor.indextank.rpc.IndexerStatus;
import com.flaptor.indextank.rpc.SearcherServer;
import com.flaptor.indextank.rpc.SuggestorServer;
import com.flaptor.indextank.search.DidYouMeanSearcher;
import com.flaptor.indextank.search.DocumentSearcher;
import com.flaptor.indextank.search.SnippetSearcher;
import com.flaptor.indextank.search.TrafficLimitingSearcher;
import com.flaptor.indextank.storage.alternatives.DocumentStorage;
import com.flaptor.indextank.suggest.DidYouMeanSuggestor;
import com.flaptor.indextank.suggest.NoSuggestor;
import com.flaptor.indextank.suggest.QuerySuggestor;
import com.flaptor.indextank.suggest.Suggestor;
import com.flaptor.indextank.suggest.TermSuggestor;
import com.flaptor.util.Execute;
import com.flaptor.util.FileUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

/**
*
* @author Flaptor Team
*/
public class IndexEngine {
    private static final Logger logger = Logger.getLogger(Execute.whoAmI());
 
    private BoostingIndexer indexer;
    private DocumentSearcher searcher;
    private BoostsScorer scorer;
    private DynamicDataManager boostsManager;
    private LargeScaleIndex lsi;
    private RealTimeIndex rti;
    private Suggestor suggestor;
    private DocumentStorage storage = null;
  private IndexEngineParser parser;
    private UserFunctionsManager functionsManager = null;
    private final BasicPromoter promoter;
    private IndexerStatus status;
    private final String indexCode;
    private final String environment;
    private final int basePort;
    private final File baseDir;
   
    private IndexRecoverer.IndexStorageValue recoveryStorage;
    private String cassandraClusterHosts;

    /*
     * In index configuration:
     * - log_based_storage: true/false
     * - log_server_host
     * - log_server_port
     */
    private boolean logBasedStorage = false;
    private String logServerHost;
    private int logServerPort;

    private static final int DEFAULT_BASE_PORT = 7910;
    private static final int DEFAULT_RTI_SIZE = 1000;
    private static final int DEFAULT_BDB_CACHE = 100;
    private static final int DEFAULT_MAX_SEARCH_QUEUE_LENGTH = 100;

    public static enum SuggestValues { NO, QUERIES, DOCUMENTS};
    public static enum StorageValues { NO, BDB, RAM, CASSANDRA };
   
    public IndexEngine( File baseDir,
                        int basePort,
                        int rtiSize,
                        boolean load,
                        int boostsSize,
                        SuggestValues suggest,
                        StorageValues storageValue,
                        int bdbCache,
                        String functions,
                        boolean facets,
                        String indexCode,
                        String environment ) throws IOException {
       
      this(   baseDir,
              basePort,
              rtiSize,
              load,
              boostsSize,
              suggest,
              storageValue,
              bdbCache,
              functions,
              facets,
              indexCode,
              environment,
              Maps.newHashMap()
           );
    }
   
    public IndexEngine( File baseDir,
                        int basePort,
                        int rtiSize,
                        boolean load,
                        int boostsSize,
                        SuggestValues suggest,
                        StorageValues storageValue,
                        int bdbCache,
                        String functions,
                        boolean facets,
                        String indexCode,
                        String environment,
                        Map<Object, Object> configuration) throws IOException {
       
        Preconditions.checkNotNull(indexCode);
        Preconditions.checkNotNull(environment);
        Preconditions.checkArgument(basePort > 0);
        Preconditions.checkNotNull(baseDir);

        this.indexCode = indexCode;
        this.environment = environment;
        this.basePort = basePort;
        this.baseDir = baseDir;
       
       
      String defaultField = "text";
     
     
      if (configuration.containsKey("log_based_storage")) {
          logBasedStorage = (Boolean)configuration.get("log_based_storage");
          if (logBasedStorage) {
              logServerHost = (String) configuration.get("log_server_host");
             
              logServerPort = ((Long) configuration.get("log_server_port")).intValue();
          }
      }
     
      Map<Object, Object> analyzerConfiguration = (Map<Object, Object>) configuration.get("analyzer_config");

      if (analyzerConfiguration != null) {
      Analyzer analyzer;
     
      if (analyzerConfiguration.containsKey("perField")) {
        Map<Object, Object> perfieldConfiguration = (Map<Object, Object>) analyzerConfiguration.get("perField");
        Map<String, Analyzer> perfieldAnalyzers = Maps.newHashMap();
        for (Entry<Object, Object> entry : perfieldConfiguration.entrySet()) {
          String field = (String) entry.getKey();
          Map<Object, Object> config = (Map<Object, Object>) entry.getValue();
         
          perfieldAnalyzers.put(field, buildAnalyzer(config));
        }
       
        analyzer = new CompositeAnalyzer(buildAnalyzer((Map<Object, Object>) analyzerConfiguration.get("default")), perfieldAnalyzers);
       
      } else {
        analyzer = buildAnalyzer(analyzerConfiguration);
      }
     
      parser = new IndexEngineParser(defaultField, analyzer);
    } else {
      parser = new IndexEngineParser(defaultField);
    }
     
      boostsManager = new DynamicDataManager(boostsSize, baseDir);
       
      scorer = new BoostsScorer(boostsManager, Maps.<Integer, ScoreFunction>newHashMap());
       
        functionsManager = new UserFunctionsManager(scorer);
        boolean someFunctionDefined = false;
        String def0 = "0-A";
        try {
            functionsManager.addFunction(0, def0); // Default timestamp function
        } catch (Exception ex) {
            logger.error("Defining scoring function (spec '"+def0+"')", ex);
        }
        if (null != functions && !"".equals(functions)) {
            String[] specs = functions.split("\\|");
            for (String spec : specs) {
                try {
                    String[] parts = spec.split(":",2);
                    if (parts.length == 2) {
                        int id = Integer.parseInt(parts[0].trim());
                        String def = parts[1].trim();
                        functionsManager.addFunction(id, def);
                        someFunctionDefined = true;
                    } else {
                        logger.error("Function should be defined as <id>:<definition> (found '"+spec+"').");
                    }
                } catch (Exception ex) {
                    logger.error("Defining scoring function (spec '"+spec+"')", ex);
                }
            }
        }

        FacetingManager facetingManager;
       
        if (facets) {
          facetingManager = new DynamicDataFacetingManager(boostsManager);
        } else {
          facetingManager = new NoFacetingManager();
        }
       
        lsi = new LargeScaleIndex(scorer, parser, baseDir, facetingManager);
    rti = new RealTimeIndex(scorer, parser, rtiSize, facetingManager);
        switch (suggest) {
            case NO:
                suggestor = new NoSuggestor();
                break;
            case DOCUMENTS:
                IndexEngineParser suggestorParser = new IndexEngineParser(defaultField);
                suggestor = new TermSuggestor(suggestorParser, baseDir);
                break;
            case QUERIES:
                suggestor = new QuerySuggestor(parser, baseDir);
                break;
        }
       
        this.cassandraClusterHosts = (String) configuration.get("cassandra_cluster_hosts");
        // index recovery configuration
        String recoveryConf = (String) configuration.get("index_recovery");
        if ("cassandra".equals(recoveryConf)) {
            if (this.cassandraClusterHosts == null || this.cassandraClusterHosts.trim().length() == 0)
                throw new IllegalArgumentException("Invalid cassandra servers for index recovery");
            this.recoveryStorage = IndexRecoverer.IndexStorageValue.CASSANDRA;
            logger.info("Index recovery configuration set to recover index from cassandra servers: " + this.cassandraClusterHosts);

        } else {
            logger.info("Index recovery configuration set to recover index from simpleDB");
            this.recoveryStorage = IndexRecoverer.IndexStorageValue.SIMPLEDB;
        }
       
        switch (storageValue) {
            case RAM:
                storage = new InMemoryStorage(baseDir, load);
                logger.info("Using in-memory storage");
                break;
            case NO:
                storage = null;
                logger.info("NOT Using storage");
                break;
        }

        promoter = new BasicPromoter(baseDir, load);
        searcher = new Blender(lsi, rti, suggestor, promoter, boostsManager);
        indexer = new Dealer(lsi, rti, suggestor, boostsManager, rtiSize, promoter, functionsManager);
        status = IndexerStatus.started;

    }

  private Analyzer buildAnalyzer(Map<Object, Object> configuration) {
    Analyzer analyzer;
    String factoryClassString = (String) configuration.get("factory");
    Map<Object, Object> factoryConfig = (Map<Object, Object>) configuration.get("configuration");
   
    try {
      Class<?> factoryClass = Class.forName(factoryClassString);
      Method method = factoryClass.getMethod("buildAnalyzer", new Class[] {Map.class});
     
      analyzer = (Analyzer) method.invoke(null, factoryConfig);
     
      if (factoryConfig.containsKey("filters")) {
        analyzer = new FilteringAnalyzer(analyzer, factoryConfig);
      }
     
    } catch (ClassNotFoundException e) {
      throw new RuntimeException("Analyzer factory class not found", e);
    } catch (SecurityException e) {
      throw new RuntimeException("Analyzer factory class not instantiable", e);
    } catch (NoSuchMethodException e) {
      throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer", e);
    } catch (IllegalArgumentException e) {
      throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer", e);
    } catch (IllegalAccessException e) {
      throw new RuntimeException("Analyzer factory class does not have the required static method buildAnalyzer or it is not accessible", e);
    } catch (InvocationTargetException e) {
      throw new RuntimeException("Analyzer factory class threw an exception for the give configuration", e);
    }
    return analyzer;
  };


    public BoostingIndexer getIndexer(){
        return this.indexer;
    }

    public DocumentSearcher getSearcher(){
        return this.searcher;
    }

    public Suggestor getSuggestor() {
        return this.suggestor;
    }
  
    public DocumentStorage getStorage() {
    return storage;
  }

    public IndexEngineParser getParser() {
    return parser;
  }

    public void setStatus(IndexerStatus status) {
        this.status = status;
    }

    public IndexerStatus getStatus() {
        return status;
    }
   
    public void setIndexer(BoostingIndexer indexer) {
        this.indexer = indexer;
    }

    public synchronized void startFullRecovery() {
        if (getStatus() != IndexerStatus.started && getStatus() != IndexerStatus.error) {
            logger.error("startFullRecovery requested, but I'm in the wrong state (" + getStatus() + ')');
        } else {
            setStatus(IndexerStatus.recovering);
            logger.info("startFullRecovery requested. Creating and starting a recovery thread.");
           
            Runnable recoverer;
            if (logBasedStorage) {
                recoverer = new LogIndexRecoverer(this, indexCode, logServerHost, logServerPort);
            } else {
                recoverer = new IndexRecoverer(this, "127.0.0.1", basePort, baseDir, indexCode, environment, recoveryStorage, cassandraClusterHosts);
                ((IndexRecoverer)recoverer).resetTimestamp();
            }
           
            Thread recovererThread = new Thread(recoverer) {
                                            public void run() {
                                                try {
                                                    super.run();
                                                    setStatus(IndexerStatus.ready);
                                                } catch (Exception e) {
                                                    logger.error("Exception while recovering.", e);
                                                    setStatus(IndexerStatus.error);
                                                }
                                            }
                                        };
            recovererThread.start();
               
        }
    }

    @SuppressWarnings("static-access")
    private static Options getOptions(){
        Option baseDir = OptionBuilder  .withArgName("base-dir")
                                        .hasArg()
                                        .isRequired()
                                        .withDescription("The basint e dir")
                                        .withLongOpt("dir")
                                        .create("d");

        Option basePort = OptionBuilder .withArgName("base-port")
                                        .hasArg()
                                        .withDescription("The base port")
                                        .withLongOpt("port")
                                        .create("p");

        Option boostSize = OptionBuilder .withArgName("boosts-size")
                        .hasArg()
                        .withDescription("Number of available boosts")
                        .withLongOpt("boosts")
                        .create("b");

        Option rtiSize = OptionBuilder .withArgName("rti-size")
                                        .hasArg()
                                        .withDescription("The size limit for the RTI")
                                        .withLongOpt("rti-size")
                                        .create("rs");

        Option help     = OptionBuilder .withDescription("displays this help")
                                        .withLongOpt("help")
                                        .create("h");

        Option snippets = OptionBuilder .withDescription("Allow snippet generation and field fetching.")
                                        .withLongOpt("snippets")
                                        .create("sn");
       
        Option recover = OptionBuilder  .withDescription("Recover documents from the storage.")
                                        .withLongOpt("recover")
                                        .create("r");

        Option indexCode = OptionBuilder.withArgName("code")
                                        .hasArg()
                                        .isRequired()
                                        .withDescription("the index code this indexengine has")
                                        .withLongOpt("index-code")
                                        .create("ic");

        Option environment = OptionBuilder.withArgName("environment")
                        .hasArg()
                        .isRequired()
                        .withDescription("environment prefix")
                        .withLongOpt("environment-prefix")
                        .create("env");
        /*
         * Analyzer argument should receive a JSON string with the following root structure:
         *    - factory: a java type that implements the following static method: org.apache.lucene.analysis.Analyzer buildAnalyzer(Map).
         *    - configuration: a JSON object to be passed to the buildAnalyzer method.
         */
        Option analyzer = OptionBuilder.withArgName("analyzer")
                        .hasArg()
                        .withDescription("specific analyzer")
                        .withLongOpt("analyzer")
                        .create("an");

        Option configFile = OptionBuilder.withArgName("conf-file")
                        .hasArg()
                        .withDescription("configuration file")
                        .withLongOpt("conf-file")
                        .create("cf");

        Option loadState = OptionBuilder.withArgName("load")
                                        .withDescription("if present, the index engine will try to restore its state"
                                                + "from the serialized form.")
                                        .withLongOpt("load-state")
                                        .create("l");

        Option suggest = OptionBuilder.withArgName("suggest")
                                        .hasArg()
                                        .withDescription("if present, loads the suggest/autocomplete system.")
                                        .withLongOpt("suggest")
                                        .create("su");
        Option facets = OptionBuilder.withArgName("facets")
                        .withDescription("if present, performs facetings queries.")
                        .withLongOpt("facets")
                        .create("fa");

        Option functions = OptionBuilder.withArgName("functions")
                                        .hasArg()
                                        .withDescription("list of '|' separated scoring functions, each of which has the form <id>:<definition>.")
                                        .withLongOpt("functions")
                                        .create("fn");
        Option didyoumean = OptionBuilder.withLongOpt("didyoumean")
                                        .withDescription("if present, performs 'did you mean?' suggestions on queries. Requires --suggest documents.")
                                        .create("dym");
       
        Option storage  = OptionBuilder.withLongOpt("storage")
                                        .hasArg()
                                        .withDescription("if present, specifies a storage backend. Options are 'bdb' and 'ram'. Defaults to 'ram'.")
                                        .create("st");

        Option bdbCache  = OptionBuilder.withLongOpt("bdb-cache")
                                        .hasArg()
                                        .withDescription("if present, specifies the size of the berkeleyDb cache per thread, in megabytes. Defaults to 100MB.")
                                        .create("bc");

        Options options = new Options();
        options.addOption(baseDir);
        options.addOption(basePort);
        options.addOption(boostSize);
        options.addOption(help);
        options.addOption(snippets);
        options.addOption(recover);
        options.addOption(indexCode);
        options.addOption(rtiSize);
        options.addOption(loadState);
        options.addOption(suggest);
        options.addOption(facets);
        options.addOption(functions);
        options.addOption(environment);
        options.addOption(analyzer);
        options.addOption(didyoumean);
        options.addOption(configFile);
        options.addOption(storage);
        options.addOption(bdbCache);

        return options;
    }

    private static void printHelp(Options options, String error) {
        if (null != error) {
            System.out.println("Parsing failed.  Reason: " + error);
        }
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("IndexEngine",options);
    }

    //--------------------------------------------------------------------------------
    //PRIVATE CLASSES
    private static class ShutdownThread extends Thread {
        private final BoostingIndexer server;
        public ShutdownThread(BoostingIndexer server) {
            this.server = server;
            setName("IndexEngine's ShutdownThread");
        }
        @Override
        public void run() {
            try {
                logger.info("Shutdown hook started.");
                server.dump();
                logger.info("Shutdown hook ended.");
            } catch (Exception e) {
                logger.error("Exception caught while saving state to disk. This probably means that some data was lost.", e);
            }
        }
    }

    //--------------------------------------------------------------------------------
    //STATIC METHODS
    public static void main(String[] args) throws IOException{
        String log4jConfigPath = com.flaptor.util.FileUtil.getFilePathFromClasspath("log4j.properties");
        if (null != log4jConfigPath) {
            org.apache.log4j.PropertyConfigurator.configureAndWatch(log4jConfigPath);
        } else {
            logger.warn("log4j.properties not found on classpath!");
        }
        // create the parser
        CommandLineParser parser = new PosixParser();
        try {
            // parse the command line arguments
            CommandLine line = parser.parse( getOptions(), args );
            if (line.hasOption("help")) {
                printHelp(getOptions(),null);
                System.exit(1);
            }

            File baseDir = new File(line.getOptionValue("dir"));
            int basePort = Integer.parseInt(line.getOptionValue("port", String.valueOf(DEFAULT_BASE_PORT)));
            int boostsSize = Integer.parseInt(line.getOptionValue("boosts", String.valueOf(1)));
            int rtiSize = Integer.parseInt(line.getOptionValue("rti-size", String.valueOf(DEFAULT_RTI_SIZE)));
            boolean loadState = line.hasOption("load-state");

            SuggestValues suggest;
            if (line.hasOption("suggest")) {
                String value = line.getOptionValue("suggest");
                if ( value.equalsIgnoreCase("queries")) {
                    suggest = SuggestValues.QUERIES;
                } else if ( value.equalsIgnoreCase("documents")) {
                    suggest = SuggestValues.DOCUMENTS;
                } else {
                    throw new IllegalArgumentException("Invalid value for suggest: can only be \"queries\" or \"documents\".");
                }
            } else {
                suggest = SuggestValues.NO;
            }
           
            StorageValues storageValue = StorageValues.RAM;
            int bdbCache = 0;
            if (line.hasOption("storage")){
                String storageType = line.getOptionValue("storage");
                if ("bdb".equals(storageType)) {
                    storageValue = StorageValues.BDB;
                    bdbCache = Integer.parseInt(line.getOptionValue("bdb-cache", String.valueOf(DEFAULT_BDB_CACHE)));
                } else if ("cassandra".equals(storageType)) {
                    storageValue = StorageValues.CASSANDRA;
                } else if ("ram".equals(storageType)) {
                    storageValue = StorageValues.RAM;
                } else {
                    throw new IllegalArgumentException("storage has to be 'cassandra', 'bdb' or 'ram'. '" + storageType + "' given.");
                }
            }

            String functions = null;
            if (line.hasOption("functions")) {
                functions = line.getOptionValue("functions");
            }
           
            String environment;
            String val = line.getOptionValue("environment-prefix", null);
            if (null != val) {
              environment = val;
            } else {
              environment = "";
            }
            logger.info("Command line option 'environment-prefix' set to " + environment);
     
            boolean facets = line.hasOption("facets");
            logger.info("Command line option 'facets' set to " + facets);
            String indexCode = line.getOptionValue("index-code");
            logger.info("Command line option 'index-code' set to " + indexCode);

            Map<Object, Object> configuration = Maps.newHashMap();
         
            String configFile = line.getOptionValue("conf-file", null);
          logger.info("Command line option 'conf-file' set to " + configFile);
           
          if (configFile != null) {
            configuration = (Map<Object, Object>) JSONValue.parse(FileUtil.readFile(new File(configFile)));
          }
            IndexEngine ie = new IndexEngine(
                                               baseDir,
                                               basePort,
                                               rtiSize,
                                               loadState,
                                               boostsSize,
                                               suggest,
                                               storageValue,
                                               bdbCache,
                                               functions,
                                               facets,
                                               indexCode,
                                               environment,
                                               configuration);

            BoostingIndexer indexer = ie.getIndexer();
            DocumentSearcher searcher = ie.getSearcher();
            Suggestor suggestor = ie.getSuggestor();
            DocumentStorage storage = ie.getStorage();


            if (line.hasOption("snippets")) {
                // shouldn't this be set based on storageValue?
              indexer = new DocumentStoringIndexer(indexer, storage);
              ie.setIndexer(indexer);
                searcher = new SnippetSearcher(searcher, storage, ie.getParser());
            }

            if (line.hasOption("didyoumean")) {
                if (suggest != SuggestValues.DOCUMENTS) {
                    throw new IllegalArgumentException("didyoumean requires --suggest documents");
                }
                DidYouMeanSuggestor dym = new DidYouMeanSuggestor((TermSuggestor)ie.getSuggestor());
                searcher = new DidYouMeanSearcher(searcher, dym);
            }

            int maxSearchQueueLength = DEFAULT_MAX_SEARCH_QUEUE_LENGTH;
            if (configuration.containsKey("max_search_queue")) {
                maxSearchQueueLength = ((Long) configuration.get("max_search_queue")).intValue();
                logger.info("Using max_search_queue length: " + maxSearchQueueLength);
            }

            searcher = new TrafficLimitingSearcher(searcher, maxSearchQueueLength);
            Runtime.getRuntime().addShutdownHook(new ShutdownThread(indexer));

            new SearcherServer(searcher, ie.getParser(), ie.boostsManager, ie.scorer, basePort + 2).start();
      new SuggestorServer(suggestor, basePort + 3).start();
            IndexerServer indexerServer = new IndexerServer(ie, indexer, basePort + 1);
            indexerServer.start();

        } catch( ParseException exp ) {
            printHelp(getOptions(),exp.getMessage());
        }

    }

}
TOP

Related Classes of com.flaptor.indextank.index.IndexEngine

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.