Package com.flaptor.indextank.index.lsi

Source Code of com.flaptor.indextank.index.lsi.LsiIndex

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.flaptor.indextank.index.lsi;

import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MMapDirectory;

import com.flaptor.indextank.index.QueryMatcher;
import com.flaptor.indextank.index.lsi.term.IndexReaderTermMatcher;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.Scorer;
import com.flaptor.indextank.index.term.TermMatcher;
import com.flaptor.indextank.index.term.query.TermBasedQueryMatcher;
import com.flaptor.indextank.query.IndexEngineParser;
import com.flaptor.util.Execute;
import com.flaptor.util.Pair;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;


public class LsiIndex {
    private static final Logger logger = Logger.getLogger(Execute.whoAmI());

    public static final int SEARCHER_POOL_SIZE = 2;
    public static final String PAYLOAD_TERM_FIELD = "docidpayload";
    public static final String PAYLOAD_TERM_TEXT = "docidpayload";
    public static final Term PAYLOAD_TERM = new Term(PAYLOAD_TERM_FIELD, PAYLOAD_TERM_TEXT);
   
    private final File dirLocation;
    private /*final*/ Directory directory;
    private final Scorer scorer;
    private final AtomicReference<Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>> searchObjects;
    private volatile IndexWriter indexWriter;

    private final FacetingManager facetingManager;
  private final IndexEngineParser parser;
 
  private final Map<String, String> stats = new ConcurrentHashMap<String, String>();


    public LsiIndex(IndexEngineParser parser, String directoryPath, Scorer scorer, FacetingManager facetingManager) throws IOException {
        this.parser = parser;
    this.scorer = scorer;
    this.facetingManager = facetingManager;
    Preconditions.checkNotNull(directoryPath);
        dirLocation = new File(directoryPath);
        if (!dirLocation.exists() || !dirLocation.isDirectory()) {
            throw new IllegalArgumentException("Wrong directory path.");
        }
        directory = new MMapDirectory(dirLocation);
        reopenWriter();
        searchObjects = new AtomicReference<Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>>();
        reopenSearcher();
    }

    private void reopenSearcher() {
        BlockingDeque<IndexSearcher> searcherPool= new LinkedBlockingDeque<IndexSearcher>();
        BlockingDeque<QueryMatcher> matcherPool = new LinkedBlockingDeque<QueryMatcher>();
        for (int i=0; i < SEARCHER_POOL_SIZE; i++) {
            try {
                IndexSearcher searcher = new IndexSearcher(directory, true); //read-only for better concurrent performance.
                TermMatcher termMatcher = new IndexReaderTermMatcher(searcher.getIndexReader(), PAYLOAD_TERM);
                QueryMatcher matcher = new TermBasedQueryMatcher(scorer, termMatcher, this.facetingManager);   
                searcherPool.addFirst(searcher); //no blocking, throws exception.
                matcherPool.addFirst(matcher);
            } catch (CorruptIndexException cie) {
                logger.fatal("HORROR!!! corrupted index. unable to reopen", cie);
            } catch (IOException ioe) {
                logger.fatal("HORROR!!! IO exception. unable to reopen", ioe);
            }
        }
        searchObjects.set(new Pair<BlockingDeque<IndexSearcher>, BlockingDeque<QueryMatcher>>(searcherPool, matcherPool));
    }

    private void reopenWriter() throws CorruptIndexException, LockObtainFailedException, IOException {
        indexWriter = new IndexWriter(this.directory, getAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
    }

  private Analyzer getAnalyzer() {
    Analyzer analyzer = parser.getAnalyzer();
    Analyzer payloadAnalyzer = new PayloadAnalyzer();
    return new PerFieldAnalyzerWrapper(analyzer, ImmutableMap.of(LsiIndex.PAYLOAD_TERM_FIELD, payloadAnalyzer));
  }

    public BlockingDeque<IndexSearcher> getLuceneIndexSearcherPool() {
        return searchObjects.get().first();
    }
   
    public BlockingDeque<QueryMatcher> getQueryMatcherPool() {
        return searchObjects.get().last();
    }

  public IndexWriter getLuceneIndexWriter() {
        return indexWriter;
    }

    public void flush(){
        try {
            long t = System.currentTimeMillis();
            int before = indexWriter.maxDoc();
            indexWriter.commit();
            int after = indexWriter.maxDoc();
            int total = indexWriter.numDocs();
           
            double commitTime = (System.currentTimeMillis() - t) / 1000.0;
            stats.put("lucene_doc_count", String.valueOf(total));
            stats.put("commit_time", String.valueOf(commitTime));
            stats.put("lucene_max_doc", String.valueOf(after));
            stats.put("lucene_previous_max_doc", String.valueOf(before));
           
            logger.info(String.format("Commited index to disk in %.3fs. Document count is %d. MaxDoc from %d to %d", commitTime, total, before, after));
        } catch (IOException e) {
            logger.fatal("unexpected exception while commiting the index: ", e);
            System.exit(1);
        }
        reopenSearcher();
    }
   
    public Map<String, String> getStats() {
        return stats;
    }

}
TOP

Related Classes of com.flaptor.indextank.index.lsi.LsiIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.