Package proj.zoie.impl.indexing.luceneNRT

Source Code of proj.zoie.impl.indexing.luceneNRT.ThrottledLuceneNRTDataConsumer$ReopenThread

package proj.zoie.impl.indexing.luceneNRT;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import proj.zoie.api.ZoieVersion;
import proj.zoie.api.DataConsumer;
import proj.zoie.api.ZoieVersion;
import proj.zoie.api.IndexReaderFactory;
import proj.zoie.api.ZoieException;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexableInterpreter;
import proj.zoie.api.indexing.ZoieIndexable.IndexingReq;

public class ThrottledLuceneNRTDataConsumer<D, V extends ZoieVersion> implements DataConsumer<D,V>,IndexReaderFactory<IndexReader>{
  private static final Logger logger = Logger.getLogger(ThrottledLuceneNRTDataConsumer.class);

  private static int MAX_READER_GENERATION = 3;
  /**
   * document ID field name
  */
  public static final String DOCUMENT_ID_FIELD = "id";
   
 
  private IndexWriter _writer;
  private Analyzer _analyzer;
  private ZoieIndexableInterpreter<D> _interpreter;
  private Directory _dir;
  private final long _throttleFactor;
  private IndexReader _currentReader;
  private ReopenThread _reopenThread;
  private HashSet<IndexReader> _returnSet = new HashSet<IndexReader>();
  private ConcurrentLinkedQueue<IndexReader> _returnList = new ConcurrentLinkedQueue<IndexReader>();
 
  public ThrottledLuceneNRTDataConsumer(File dir,ZoieIndexableInterpreter<D> interpreter,long throttleFactor) throws IOException{
    this(FSDirectory.open(dir),new StandardAnalyzer(Version.LUCENE_CURRENT),interpreter,throttleFactor);
  }
 
  public ThrottledLuceneNRTDataConsumer(File dir,Analyzer analyzer,ZoieIndexableInterpreter<D> interpreter,long throttleFactor) throws IOException{
    this(FSDirectory.open(dir),analyzer,interpreter,throttleFactor);
  }
 
  public ThrottledLuceneNRTDataConsumer(Directory dir,Analyzer analyzer,ZoieIndexableInterpreter<D> interpreter,long throttleFactor){
    _writer = null;
    _analyzer = analyzer;
    _interpreter = interpreter;
    _dir = dir;
    _throttleFactor = throttleFactor;
    _currentReader = null;
    if (_throttleFactor<=0) throw new IllegalArgumentException("throttle factor must be > 0");
    _reopenThread = new ReopenThread();
  }
 
  public void start(){
    try {
      _writer = new IndexWriter(_dir, _analyzer,MaxFieldLength.UNLIMITED);
      _reopenThread.start();
    } catch (IOException e) {
      logger.error("uanble to start consumer: "+e.getMessage(),e);
    }
  }
 
  public void shutdown(){
    _reopenThread.terminate();
    if (_currentReader!=null){
      try {
        _currentReader.close();
      } catch (IOException e) {
        logger.error(e.getMessage(),e);
      }
    }
    if (_writer!=null){
      try {
        _writer.close();
      } catch (IOException e) {
        logger.error(e.getMessage(),e);
      }
    }
  }
 
  public void consume(Collection<proj.zoie.api.DataConsumer.DataEvent<D,V>> events)
      throws ZoieException {
    if (_writer == null){
      throw new ZoieException("Internal IndexWriter null, perhaps not started?");
    }
   
    if (events.size() > 0){
      for (DataEvent<D,V> event : events){
        ZoieIndexable indexable = _interpreter.convertAndInterpret(event.getData());
        if (indexable.isSkip()) continue;
       
        try {
          _writer.deleteDocuments(new Term(DOCUMENT_ID_FIELD,String.valueOf(indexable.getUID())));
        } catch(IOException e) {
          throw new ZoieException(e.getMessage(),e);
        }
         
        IndexingReq[] reqs = indexable.buildIndexingReqs();
        for (IndexingReq req : reqs){
        Analyzer localAnalyzer = req.getAnalyzer();
        Document doc = req.getDocument();
        Field uidField = new Field(DOCUMENT_ID_FIELD,String.valueOf(indexable.getUID()),Store.NO,Index.NOT_ANALYZED_NO_NORMS);
        uidField.setOmitTermFreqAndPositions(true);
        doc.add(uidField);
        if (localAnalyzer == null) localAnalyzer = _analyzer;
        try {
          _writer.addDocument(doc, localAnalyzer);
        } catch(IOException e) {
          throw new ZoieException(e.getMessage(),e);
        }
        }
      }
     
     
      int numdocs;
      try {
        // for realtime commit is not needed per lucene mailing list
        //_writer.commit();
        numdocs = _writer.numDocs();
      } catch (IOException e) {
        throw new ZoieException(e.getMessage(),e);
      }
     
      logger.info("flushed "+events.size()+" events to index, index now contains "+numdocs+" docs.");
    }
  }

  public Analyzer getAnalyzer() {
    return _analyzer;
  }

  public IndexReader getDiskIndexReader() throws IOException {
    return _currentReader;
  }

  public List<IndexReader> getIndexReaders() throws IOException {
    IndexReader subReader = getDiskIndexReader();
    ArrayList<IndexReader> list = new ArrayList<IndexReader>();
    if (subReader!=null){
      list.add(subReader);
    }
    return list;
  }

  public void returnIndexReaders(List<IndexReader> readers) {
    if (readers!=null){
      for (IndexReader r : readers){
        if (r != _currentReader){
          returnReader(r);
        }
      }
    }
  }
 
  private void returnReader(IndexReader reader){
    synchronized(_returnSet){
      if (!_returnSet.contains(reader)){
        _returnSet.add(reader);
        _returnList.add(reader);
      }
      while (_returnList.size()>=MAX_READER_GENERATION){
        logger.info("remove and close old reader: "+_returnList.size()+"/"+_returnSet.size());
        IndexReader r = _returnList.remove();
        _returnSet.remove(r);
        try {
          r.close();
        } catch (IOException e) {
          logger.error(e.getMessage(),e);
        }
      }
    }
  }
 
  private class ReopenThread extends Thread{
    private boolean _stop;
    ReopenThread(){
      super("reopen thread");
      setDaemon(true);
      _stop=false;
    }
   
    void terminate(){
      if (!_stop){
        _stop=false;
        interrupt();
      }
    }
   
    public void run(){
      while(!_stop){
        try {
          Thread.sleep(ThrottledLuceneNRTDataConsumer.this._throttleFactor);
        } catch (InterruptedException e) {
          continue;
        }
        if (ThrottledLuceneNRTDataConsumer.this._writer!=null){
          try {
            logger.info("updating reader...");
            IndexReader oldReader = ThrottledLuceneNRTDataConsumer.this._currentReader;
            ThrottledLuceneNRTDataConsumer.this._currentReader=ThrottledLuceneNRTDataConsumer.this._writer.getReader();
            if (oldReader!=null){
              returnReader(oldReader);
            }
          } catch (IOException e) {
            logger.error(e.getMessage(),e);
          }
        }
      }
    }
  }
 
  public V getVersion()
  {
    throw new UnsupportedOperationException();
  }
}
TOP

Related Classes of proj.zoie.impl.indexing.luceneNRT.ThrottledLuceneNRTDataConsumer$ReopenThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.