Package proj.zoie.impl.indexing.internal

Source Code of proj.zoie.impl.indexing.internal.RealtimeIndexDataLoader

package proj.zoie.impl.indexing.internal;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Queue;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Similarity;

import proj.zoie.api.DataConsumer;
import proj.zoie.api.ZoieVersion;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.indexing.IndexingEventListener;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexableInterpreter;
import proj.zoie.impl.indexing.IndexUpdatedEvent;

/**
* Keeps track of the number of incoming data events.
*
* @author ymatsuda, xgu
*
*/
public class RealtimeIndexDataLoader<R extends IndexReader, D, V extends ZoieVersion> extends BatchedIndexDataLoader<R,D,V>
{
  private int _currentBatchSize;
  private final DataConsumer<ZoieIndexable,V>  _ramConsumer;
  private final DiskLuceneIndexDataLoader<R,V> _luceneDataLoader;
  private final Analyzer                     _analyzer;
  private final Similarity                   _similarity;
 
  private static Logger log = Logger.getLogger(RealtimeIndexDataLoader.class);
 
  public RealtimeIndexDataLoader(DiskLuceneIndexDataLoader<R,V> dataLoader, int batchSize,int maxBatchSize,long delay,
                                 Analyzer analyzer,
                                 Similarity similarity,
                                 SearchIndexManager<R,V> idxMgr,
                                 ZoieIndexableInterpreter<D> interpreter,
                                 Queue<IndexingEventListener<V>> lsnrList)
  {
    super((DataConsumer<ZoieIndexable,V>)dataLoader, batchSize, maxBatchSize, delay, idxMgr, interpreter, lsnrList);
    _analyzer = analyzer;
    _similarity = similarity;
    _currentBatchSize = 0;
    _ramConsumer = new RAMLuceneIndexDataLoader<R,V>(_analyzer, _similarity, _idxMgr);
    _luceneDataLoader = dataLoader;
  }
 
  /* (non-Javadoc)
   * @see proj.zoie.impl.indexing.internal.BatchedIndexDataLoader#consume(java.util.Collection)
   */
  @Override
  public void consume(Collection<DataEvent<D,V>> events) throws ZoieException
  {
    if (events != null)
    {
      ArrayList<DataEvent<ZoieIndexable,V>> indexableList =
          new ArrayList<DataEvent<ZoieIndexable,V>>(events.size());
      Iterator<DataEvent<D,V>> iter = events.iterator();
      while (iter.hasNext())
      {
        try
        {
          DataEvent<D,V> event = iter.next();
          ZoieIndexable indexable =
                ((ZoieIndexableInterpreter<D>) _interpreter).convertAndInterpret(event.getData());
         
          DataEvent<ZoieIndexable,V> newEvent =
              new DataEvent<ZoieIndexable,V>(indexable,event.getVersion());
          indexableList.add(newEvent);
        }
        catch (Exception e)
        {
          ZoieHealth.setFatal();
          log.error(e.getMessage(), e);
        }
      }

      synchronized (this) // this blocks the batch disk loader thread while indexing to RAM
      {
        int size = indexableList.size();
        _ramConsumer.consume(indexableList);// consumer clear the list!
        _currentBatchSize += size;
        _eventCount += size;
       
        while (_currentBatchSize > _maxBatchSize)
        {
          // check if load manager thread is alive
          if(_loadMgrThread == null || !_loadMgrThread.isAlive())
          {
            ZoieHealth.setFatal();
            throw new ZoieException("load manager has stopped");
          }
         
          this.notifyAll(); // wake up load manager thread     
         
          try
          {
            this.wait(60000); // 1 min
          }
          catch (InterruptedException e)
          {
            continue;
          }
        }
        this.notifyAll();
      }
    }
  }
 
  public synchronized int getCurrentBatchSize()
  {
    return _currentBatchSize;
  }
 
  @Override
  protected synchronized void processBatch()
  {
    RAMSearchIndex<R,V> readOnlyMemIndex = null;
    long now = System.currentTimeMillis();
    long duration = now - _lastFlushTime;
    int eventCount = 0;
    while(_currentBatchSize < _batchSize && !_stop && !_flush && duration < _delay)
    {
      try
      {
        wait(_delay - duration);
      }
      catch (InterruptedException e)
      {
        log.warn(e.getMessage());
      }
      now = System.currentTimeMillis();
      duration = now - _lastFlushTime;
    }
    _flush = false;
    _lastFlushTime = now;

    if (_currentBatchSize > 0)
    {
      // change the status and get the read only memory index
      // this has to be done in the block synchronized on CopyingBatchIndexDataLoader
      _idxMgr.setDiskIndexerStatus(SearchIndexManager.Status.Working);
      readOnlyMemIndex = _idxMgr.getCurrentReadOnlyMemoryIndex();
      eventCount = _currentBatchSize;
      _currentBatchSize = 0;
    }

    if (eventCount > 0)
    {
      long t1=System.currentTimeMillis();
      try
      {
        if(readOnlyMemIndex != null)
          _luceneDataLoader.loadFromIndex(readOnlyMemIndex);
      }
      catch (ZoieException e)
      {
        ZoieHealth.setFatal();
        log.error(e.getMessage(),e);
      }
      finally
      {
        long t2=System.currentTimeMillis();
        _eventCount -= eventCount;
        int segmentCount = -1;
        String segmentInfo="";
        try
        {
          segmentCount = _idxMgr.getDiskSegmentCount();
          segmentInfo = _idxMgr.getDiskSegmentInfo();
        } catch (IOException e)
        {
          log.error("error getting new segment count after disk flush", e);
        }
        log.info("flushed batch of "+eventCount+" events to disk indexer, took: "+(t2-t1)+" current event count: "+_eventCount + ", current disk segment count: " + segmentCount);
        log.info("post-flush segment info: " + segmentInfo);
        IndexUpdatedEvent evt = new IndexUpdatedEvent(eventCount,t1,t2,_eventCount);
        fireIndexingEvent(evt);
        notifyAll();
      }
    }
    else
    {
      if (log.isDebugEnabled())
      {
        log.debug("batch size is 0");
      }
    }
  }
}
TOP

Related Classes of proj.zoie.impl.indexing.internal.RealtimeIndexDataLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.