Package fr.eolya.indexer

Source Code of fr.eolya.indexer.Indexer

package fr.eolya.indexer;

import java.io.*;
import java.util.*;

import org.apache.commons.io.FileUtils;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import fr.eolya.utils.Logger;
import fr.eolya.utils.Utils;
import fr.eolya.utils.XMLConfig;
import gnu.getopt.Getopt;

public class Indexer {
   
  protected Logger logger;
    protected boolean stopRequested;
   
    protected Hashtable<String,IEngine> engines = null;
   
    protected int commitEach = 0;
    protected int commitWithin = 0;
    protected int optimizeEach = 0;

    private String requestHandler = null;
    private boolean solrUseJavaBin = false;
   
    protected String settingsDirectoryPath = null;
    protected XMLConfig config;
   
    private static void usage() {
        System.out.println("Usage : java Indexer -p <properties file> [-o] [-v] [-r] [-c] [-s url] [-f]");
        System.out.println("    -o : once");
        System.out.println("    -v : verbose");
        System.out.println("    -r : reset index");
        System.out.println("    -c : optimize index only");
        System.out.println("    -s : solr core url");
        System.out.println("    -f : force url");
    }
   
    public static void main(String[] args) {
        if (args.length == 0) {
            usage();
            System.exit(-1);
        }
       
        Getopt g = new Getopt("Indexer", args, "p:orcvus:f");
        g.setOpterr(false);
        int c;
       
        boolean once = false;
        boolean optimizeOnly = false;
        boolean verbose = false;
        boolean resetindex = false;
        String propFileName = "";
        String solrCoreUrl = "";
        boolean forceUrl = false;
       
        while ((c = g.getopt()) != -1) {
            switch (c) {
                case 'p':
                    propFileName = g.getOptarg();
                    break;
                   
                case 'o':
                    once = true;
                    break;
                   
                case 'r':
                    resetindex = true;
                    break;
                   
                case 'c':
                    optimizeOnly = true;
                    break;
                   
                case 'v':
                    verbose = true;
                    break;
                   
                case 's':
                    solrCoreUrl = g.getOptarg();
                    break;
                                       
                case 'f':
                    forceUrl = true;
                    break;
            }
        }
       
        if ("".equals(propFileName)) {
            System.out.println("Error: no properties file specified");
            System.exit(-1);
        }
        propFileName = Utils.getValidPropertyPath(propFileName, null, "HOME");
        String settingsDirectoryPath = new File(propFileName).getParentFile().getAbsolutePath();
       
        System.out.println("Config file = " + propFileName);
       
        XMLConfig config = new XMLConfig();
        try {
            File configFile =new File(propFileName);
            if (!configFile.exists()) {
                System.out.println("Error configuration file not found [" + propFileName + "]");
                System.exit(-1);
            }
            config.loadFile(propFileName);
        }
        catch(IOException e) {
            System.out.println("Error while reading properties file");
            e.printStackTrace();
            System.exit(-1);     
        }
       
        String witnessFilesPath = config.getProperty("/indexer/param[@name='witnessfilespath']");
        witnessFilesPath = Utils.getValidPropertyPath(witnessFilesPath, null, "HOME");
        if (witnessFilesPath == null || "".equals(witnessFilesPath)) {
            System.out.println("Error : missing witness_files_path propertie");
            System.exit(-1);
        }
       
        String witnessFilesName = config.getProperty("/indexer/param[@name='witnessfilesname']", "indexer");
       
        File filePid = new File(witnessFilesPath + "/" + witnessFilesName + ".pid");
        if (filePid.exists()) {
            System.out.println("A indexer instance is already running");
            System.exit(-1);
        }
       
        try {
            BufferedWriter out = new BufferedWriter(new FileWriter(filePid.getAbsolutePath()));
            out.write(Utils.getProcessId());
            out.close();
        } catch (Exception e) {
            System.out.println("Error while creating file : " + filePid.getAbsolutePath());
            e.printStackTrace();
            System.exit(-1);
        }
       
        try {
            Indexer indexer = new Indexer(config, settingsDirectoryPath);
            indexer.run(once, verbose, resetindex, optimizeOnly, solrCoreUrl, forceUrl);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (filePid.exists()) {
                filePid.delete();
            }
        }
    }
   
    public Indexer(XMLConfig config, String settingsDirectoryPath) {
        this.config = config;
        this.settingsDirectoryPath = settingsDirectoryPath;
    }
   
    public void run(boolean once, boolean verbose, boolean resetindex, boolean optimizeOnly, String solrCoreUrl, boolean forceUrl) {
       
        String logFilesPath = config.getProperty("/indexer/param[@name='logfilename']");
        logFilesPath = Utils.getValidPropertyPath(logFilesPath, null, "HOME");
        logger = new Logger(logFilesPath);
       
        String witnessFilesPath = config.getProperty("/indexer/param[@name='witnessfilespath']");
        witnessFilesPath = Utils.getValidPropertyPath(witnessFilesPath, null, "HOME");
       
        String witnessFilesName = config.getProperty("/indexer/param[@name='witnessfilesname']", "indexer");
       
        File filePid = new File(witnessFilesPath + "/" + witnessFilesName + ".pid");
        File fileStop = new File(witnessFilesPath + "/" + witnessFilesName + ".stop");
       
        String filesQueue = config.getProperty("/indexer/param[@name='queuepath']");
        filesQueue = Utils.getValidPropertyPath(filesQueue, null, "HOME");
       
        if (filesQueue==null && "".equals(filesQueue)) { // for backward compatibility
            filesQueue = config.getProperty("/indexer/param[@name='queupath']");
            filesQueue = Utils.getValidPropertyPath(filesQueue, null, "HOME");
        }
       
        String filesPattern = config.getProperty("/indexer/param[@name='queuefilepattern']");
       
        commitWithin = Integer.parseInt(config.getProperty("/indexer/solr/param[@name='commitwithin']", "0").trim());
        commitEach = Integer.parseInt(config.getProperty("/indexer/solr/param[@name='commiteach']", "0").trim());
        optimizeEach = Integer.parseInt(config.getProperty("/indexer/solr/param[@name='optimizeeach']", "0").trim());
        requestHandler = config.getProperty("/indexer/solr/param[@name='requesthandler']");
        solrUseJavaBin = ("javabin".equals(config.getProperty("/indexer/solr/param[@name='jababin']", "")));
       
        int stopAfterMaxSuccessiveError = Integer.parseInt(config.getProperty("/indexer/param[@name='stopaftermaxsuccessivesolrerror']", "3").trim());
       
        logger.log("=================================");
        logger.log("Indexer starting");
        if (once)
            logger.log("    mode once");
        if (verbose)
            logger.log("    mode verbose");
        if (optimizeOnly)
            logger.log("    mode optimize only");
        logger.log("=================================");
        logger.log("");
       
        // initiate Solr
        engines = new Hashtable<String,IEngine> ();
       
        String solrUrl = "";
        String solrCoreName = null;
        if (solrCoreUrl!=null && !"".equals(solrCoreUrl)) {
            solrUrl = solrCoreUrl;
        } else {
            solrUrl = config.getProperty("/indexer/solr/param[@name='baseurl']");
            if (solrUrl!=null && !"".equals(solrUrl)) {
                solrCoreName = config.getProperty("/indexer/solr/param[@name='corename']");
                //if (solrCoreName != null && !"".equals(solrCoreName))
                //    solrUrl = solrUrl + solrCoreName;
            } else {
              String solrHome = config.getProperty("/indexer/solr/param[@name='home']");
              if (solrHome!=null && !"".equals(solrHome)) {
                solrUrl = solrHome;
                  solrCoreName = config.getProperty("/indexer/solr/param[@name='corename']");
                  //if (solrCoreName != null && !"".equals(solrCoreName))
                  //    solrUrl = solrUrl + solrCoreName;
              }
            }
        }
       
        if (solrUrl!=null && !"".equals(solrUrl)) {
            SolrCore solrCore = createSolrCore(solrUrl, solrCoreName, commitWithin, commitEach, optimizeEach, logger, verbose, requestHandler, solrUseJavaBin);
            if (solrCore!=null) {
                //return;
               
                if (optimizeOnly) {
                    solrCore.optimize(true);
                }
               
                if (resetindex)
                    if (!solrCore.resetIndex())
                        return;
               
                //solrCores.put(solrUrl, solrCore);
                engines.put(solrUrl, solrCore);
            }
        }
              
        if (optimizeOnly) {
            filePid.delete();
            logger.log("Indexer ending");
            return;
        }
       
        stopRequested = false;
        boolean firstNoMoreFile = true;
        try {
            while (!stopRequested) {
                stopRequested = fileStop.exists() || !filePid.exists();
                if (stopRequested) {
                    break;
                }
               
                int fileCount = 0;
               
                File f = new File(filesQueue);
                if (f.isDirectory()) {
                    File[] fl = Utils.getListFileAlphaOrder(f);
                    if (fl.length>0) {
                       
                        java.util.Date dstartloop = new java.util.Date();
                        int successiveErrorCount = 0;
                       
                        for (int i = 0; i < Math.min(fl.length, 1000); i++) {
                            stopRequested = fileStop.exists() || !filePid.exists();
                            if (stopRequested) {
                                break;
                            }
                            if (fl[i].isFile() && fl[i].getName().matches(filesPattern)) {
                                logger.log("processing : " + filesQueue + "/" + fl[i].getName());
                                System.out.println("processing : " + filesQueue + "/" + fl[i].getName());
                                int ret = processOneFile(filesQueue + "/" + fl[i].getName(), solrUrl, forceUrl, verbose);
                                if (ret == -2) {
                                    // Not pinging -> file will be retry later.
                                    logger.log("Solr not pinging. Current file processing will be retry later in a few seconds.");
                                    // not pinging -> pause
                                    Utils.sleep(15000);
                               
                                else {
                                    boolean fileSuccess = true;
                                    if (ret == -1) {
                                        // File error -> remove from queue
                                        logger.log("File error. File removed from queue !");
                                        fileSuccess = false;
                                        successiveErrorCount++;
                                    }
                                    else {
                                        successiveErrorCount=0;
                                        if (ret == 0) fileSuccess = false;
                                    }
                                   
                                    fileDone (fl[i], fileSuccess, config);
                                    fileCount++;
                                   
                                    if (successiveErrorCount>=stopAfterMaxSuccessiveError) {
                                        // Erreur fatale (solr ???)
                                        filePid.delete();
                                        logger.log("Maximum successive Solr fatal error count reached - stop process !");
                                        return;
                                    }
                                }
                            }
                        }
                       
                        java.util.Date dendloop = new java.util.Date();
                        long timeloop = (dendloop.getTime() - dstartloop.getTime());
                       
                        if (timeloop>0) {
                            String msg = "Loop : \n";
                            msg += "    time (sec)                  = " + String.valueOf(timeloop / 1000) + "\n";
                            msg += "    doc                         = " + String.valueOf(Math.min(fl.length, 1000)) + "\n";
                            msg += "    time per doc (ms)           = " + String.valueOf(timeloop / Math.min(fl.length, 1000)) + "\n";
                            msg += "    docs per minute             = " + String.valueOf((Math.min(fl.length, 1000) * 60 * 1000) / timeloop) + "\n";
                            msg += "    memory (free / max / total) = " + String.valueOf(Runtime.getRuntime().freeMemory()) + " / " + String.valueOf(Runtime.getRuntime().maxMemory()) + " / " + String.valueOf(Runtime.getRuntime().totalMemory());
                            logger.log (msg);
                        }
                    }     
                    else
                        if (firstNoMoreFile) {
                            enginesCommit(true);
                            firstNoMoreFile = false;
                        }
                    }
                }
               
                if (fileCount == 0 && once) {
                    enginesCommit(true);
                    boolean queueEmpty = true;
                    logger.log("    No more file to index : start waiting 5 minutes");
                    for (int j=0; j<100 && !stopRequested && queueEmpty; j++) {
                        stopRequested = fileStop.exists() || !filePid.exists();
                        Utils.sleep(3000);
                        if (queueLength(filesQueue, filesPattern) > 0) queueEmpty = false;
                    }
                    if (queueLength(filesQueue, filesPattern) == 0) {
                        if (!stopRequested) logger.log("    No more file to index after waiting 5 minutes and mode once : stop indexing");
                        break;
                    }
                }
                else {
                    enginesCommit(false);
                    firstNoMoreFile = true;
                    Utils.sleep(2000);
                }       
               
            }
            enginesTerminate(false);
        } catch (Exception e) {
            System.err.println("An error occured: ");
            e.printStackTrace();
        }
       
        filePid.delete();
        logger.log("Fin du traitement");
        return;
    }
   
    private void fileDone(File f, boolean success, XMLConfig config) {
        String tgtPath;
        if (success) {
            tgtPath = config.getProperty("/indexer/param[@name='onsuccessmoveto']");
        } else {
            tgtPath = config.getProperty("/indexer/param[@name='onerrormoveto']");
        }
        tgtPath = Utils.getValidPropertyPath(tgtPath, null, "HOME");
       
        if (tgtPath!=null && !"".equals(tgtPath)) {
            File tgtDir = new File(tgtPath);
            tgtDir.mkdirs();
            File f2 = new File(tgtPath + "/" + f.getName());
            if (f2.exists()) f2.delete();
            try {
                FileUtils.moveFile(f, f2);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (f.exists())
            f.delete()
    }
   
    private int processOneFile(String fileName, String solrUrl, boolean forceUrl, boolean verbose) {
       
        boolean allOk = true;
        IEngine engine = null;
       
        try {
            SAXReader reader = new SAXReader();
            reader.setValidation(false);
            Document document = null;
            try {
                document = reader.read(new File(fileName));
            } catch (Exception e) {
                logger.log("     error (reader.read) : " + e.getMessage());
                if (verbose) logger.logStackTrace(e, true);
                return 0;
            }
           
            @SuppressWarnings("unchecked")
            List<Element> jobs = document.selectNodes("//doc");
            Iterator<Element> iterJobs = jobs.iterator();
            while (iterJobs.hasNext()) {
               
                Collection<InputDocument> docs = new ArrayList<InputDocument>();
               
                Element job = (Element) iterJobs.next();
               
                String targetUrl = solrUrl;
               
                // Specific solr core url ?
                String docTargetUrl = job.attributeValue("target_url");               
                if (docTargetUrl!=null && !"".equals(docTargetUrl) && !(!"".equals(targetUrl) && forceUrl)) {
                    engine = engines.get(docTargetUrl);
                    if (engine==null) {
                        engine = createEngine(docTargetUrl, null, commitWithin, commitEach, optimizeEach, logger, verbose, requestHandler, solrUseJavaBin);
                        if (engine!=null)
                            engines.put(docTargetUrl, engine);
                    }
                }
                else {
                    if (targetUrl!=null && !"".equals(targetUrl))
                        engine = engines.get(targetUrl);
                }
               
                if (engine!=null) {
                   
                    logger.log("    Solr url              = " + engine.getUrl());
                    // action ?
                    String action = "";
                    String value = job.attributeValue("action");
                    if (value != null) action = value;
                   
                    if ("resetsource".equals(action) || "remove".equals(action)) {
                        String query = "";
                       
                        @SuppressWarnings("unchecked")
            List<Element> childs = job.elements();
                        Iterator<Element> iterChilds = childs.iterator();
                        while (iterChilds.hasNext()) {
                            Element field = (Element) iterChilds.next();
                           
                            String fieldName = field.getName();
                            String fieldValue = field.getText();
                           
                            if (fieldValue != null && !"".equals(fieldValue)) {
                                if (!"".equals(query)) query += " AND ";
                                query += fieldName + ":\"" + fieldValue + "\"";
                            }
                        }
                        if (!"".equals(query)) {
                            if ("resetsource".equals(action)) {
                                logger.log("     reset source (" + query+ ")");
                            } else {
                                logger.log("     remove item (" + query+ ")");
                            }
                            engine.deleteByQuery(query);
                            if ("resetsource".equals(action) && !engine.commit(true)) {
                                logger.log("     error during commit");
                                if (!engine.ping()) {
                                    logger.log("     not pinging");
                                    return -2;
                                }
                                return -1;               
                            }
                        }
                        else {
                            logger.log("     reset source error (no query)");
                        }
                    }
                   
                    if (action==null || "".equals(action) || "add".equals(action)) {
                        InputDocument doc = new InputDocument();
                        String boostDoc = job.attributeValue("boost");
                        if (boostDoc!=null && !"".equals(boostDoc)) {
                            doc.setDocumentBoost(boostDoc);
                        }
                       
                        @SuppressWarnings("unchecked")
            List<Element> childs = job.elements();
                        Iterator<Element> iterChilds = childs.iterator();
                        while (iterChilds.hasNext()) {
                            Element field = (Element) iterChilds.next();
                           
                            String fieldName = field.getName();
                            String fieldValue = field.getText();
                           
                            if (fieldValue != null && !"".equals(fieldValue)) {
                               
                                String boostField = field.attributeValue("boost");
                               
                                // Date ?
                                if (fieldValue.matches("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}.*"))
                                    fieldValue = fieldValue.trim().replace(' ', 'T') + "Z";   
                               
                                if (boostField!=null && !"".equals(boostField)) {
                                    doc.addField(fieldName, fieldValue, boostField);
                                } else {
                                    doc.addField(fieldName, fieldValue);                                   
                                }
                                if (verbose)
                                    logger.log("    " + fieldName + "                                      ".substring(fieldName.length()) + " = " + fieldValue);
                            }
                        }
                        docs.add(doc);
                    }
                   
                    if (docs.size() > 0) {
                        try {
                            if (engine.addDocuments(docs) == -1) {
                                if (!engine.ping())
                                    return -2;
                                return -1;
                            }
                        } catch (Exception e) {             
                            logger.log("     error add : " + e.getMessage());
                            if (verbose) logger.logStackTrace(e, true);
                            // Ping ??
                            //if (!docSolrCore.ping())
                            if (!engine.ping())
                                return -2;
                            else {
                                System.err.println("An error occured: ");
                                e.printStackTrace();
                                return 0;
                            }
                        }
                    }
                }
                else {
                    allOk = false;
                }
            }
        } catch (Exception e) {
            logger.log("     error : " + e.getMessage());
            if (verbose) logger.logStackTrace(e, false);
            System.err.println("An error occured: ");
            e.printStackTrace();
            return -1;
        }
        if (allOk)
            return 1;
        else
            return 0;
    }
   
    private int queueLength(String path, String filesPattern) {
        File f = new File(path);
        File[] fl = f.listFiles();
        int fileCount = fl.length;
        int ret = 0;
        for (int i=0; i<fileCount; i++)
            if (fl[i].isFile() && fl[i].getName().matches(filesPattern)) ret++;   
        return ret;
    }
   
   
    private IEngine createEngine(String docTargetUrl, String corename, int commitWithin, int commitEach, int optimizeEach, Logger logger, boolean verbose, String requestHandler, boolean solrUseJavaBin) {
        createSolrCore(docTargetUrl, corename, commitWithin, commitEach, optimizeEach, logger, verbose, requestHandler, solrUseJavaBin);
        return null;
    }
   
    private SolrCore createSolrCore(String solrUrl, String corename, int commitWithin, int commitEach, int optimizeEach, Logger logger, boolean verbose, String requestHandler, boolean solrUseJavaBin) {
        logger.log("Create new Solr core connection : " + solrUrl);
        SolrCore solrCore = new SolrCore(solrUrl, corename, commitWithin, commitEach, optimizeEach, logger, requestHandler, solrUseJavaBin);       
        solrCore.setOutputStackTrace(verbose);
       
        if (!solrCore.connect())
            return null;
       
        if (!solrCore.ping())
            return null;
       
        return solrCore;
    }
   
    private void enginesCommit(boolean force) {
        Iterator<String> itr = engines.keySet().iterator();
        while (itr.hasNext()) {
            String url = itr.next();
            engines.get(url).commit(force);
        }
    }
   
    private void enginesTerminate(boolean optimize) {
        Iterator<String> itr = engines.keySet().iterator();
        while (itr.hasNext()) {
            String url = itr.next();
            engines.get(url).terminate(optimize);
        }
    }
}
TOP

Related Classes of fr.eolya.indexer.Indexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.