Package proj.zoie.impl.indexing.internal

Source Code of proj.zoie.impl.indexing.internal.LuceneIndexDataLoader

package proj.zoie.impl.indexing.internal;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Similarity;

import proj.zoie.api.DataConsumer;
import proj.zoie.api.ZoieVersion;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.ZoieSegmentReader;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexable.IndexingReq;

public abstract class LuceneIndexDataLoader<R extends IndexReader, V extends ZoieVersion> implements DataConsumer<ZoieIndexable,V> {
  private static final Logger log = Logger.getLogger(LuceneIndexDataLoader.class);
  protected final Analyzer _analyzer;
  protected final Similarity _similarity;
  protected final SearchIndexManager<R,V> _idxMgr;

  protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager<R,V> idxMgr) {
    _analyzer = analyzer;
    _similarity = similarity;
    _idxMgr=idxMgr;
  }

  protected abstract BaseSearchIndex<R,V> getSearchIndex();
 
    protected abstract void propagateDeletes(LongSet delDocs) throws IOException;
    protected abstract void commitPropagatedDeletes() throws IOException;

  /**
   * @Precondition incoming events sorted by version number
   * <br>every event in the events collection must be non-null
   *
   * @see proj.zoie.api.DataConsumer#consume(java.util.Collection)
   *
   */
  public void consume(Collection<DataEvent<ZoieIndexable,V>> events) throws ZoieException {
    int eventCount = events.size();
        if (events == null || eventCount == 0)
      return;

    BaseSearchIndex<R,V> idx = getSearchIndex();

    Long2ObjectMap<List<IndexingReq>> addList = new Long2ObjectOpenHashMap<List<IndexingReq>>();
    V version = idx.getVersion();    // current version

    LongSet delSet =new LongOpenHashSet();
   
    try {
      for(DataEvent<ZoieIndexable,V> evt : events)
      {
        if (evt == null) continue;
            //version = Math.max(version, evt.getVersion());
            version = version == null ? evt.getVersion() : (version.compareTo(evt.getVersion()) < 0 ? evt.getVersion() : version);
            // interpret and get get the indexable instance
            ZoieIndexable indexable = evt.getData();
            if (indexable == null || indexable.isSkip())
              continue;
   
            long uid = indexable.getUID();
            delSet.add(uid);
            addList.remove(uid);
        if (!indexable.isDeleted()) // update event
        {
          IndexingReq[] reqs = indexable.buildIndexingReqs();
          for (IndexingReq req : reqs) {
            if (req != null) // if doc is provided, interpret as
                      // a delete, e.g. update with
                      // nothing
            {
              Document doc = req.getDocument();
              if (doc!=null){              
                ZoieSegmentReader.fillDocumentID(doc, uid);
              }
              // add to the insert list
              List<IndexingReq> docList = addList.get(uid);
              if (docList == null) {
                docList = new LinkedList<IndexingReq>();
                addList.put(uid, docList);
              }
              docList.add(req);
            }
          }
        }
        // hao: we do not need the following few lines
        //else {
          //addList.remove(uid);
        //}
      }

      List<IndexingReq> docList = new ArrayList<IndexingReq>(addList.size());
      for (List<IndexingReq> tmpList : addList.values()) {
        docList.addAll(tmpList);
      }
      idx.updateIndex(delSet, docList, _analyzer,_similarity);
      propagateDeletes(delSet);
      synchronized(_idxMgr)
      {
         idx.refresh();
         commitPropagatedDeletes();
      }
    } catch (IOException ioe) {
      ZoieHealth.setFatal();
      log.error("Problem indexing batch: " + ioe.getMessage(), ioe);
    } finally {
      try {
        if (idx != null) {
          idx.incrementEventCount(eventCount);
          idx.setVersion(version); // update the version of the
                        // index
        }
      } catch (Exception e) // catch all exceptions, or it would screw
                  // up jobs framework
      {
        log.warn(e.getMessage());
      } finally {
        if (idx instanceof DiskSearchIndex<?,?>) {
          log.info("disk indexing requests flushed.");
        }
      }
    }
  }
 
    public void loadFromIndex(RAMSearchIndex<R,V> ramIndex) throws ZoieException
    {
      try
      {
        // hao: get disk search idx,
        BaseSearchIndex<R,V> idx = getSearchIndex();
        //hao: merge the realyOnly ram idx with the disk idx
        idx.loadFromIndex(ramIndex);
        idx.clearDeletes(); // clear old deletes as deletes are written to the lucene index
        // hao: update the disk idx reader
        idx.refresh(); // load the index reader
        idx.markDeletes(ramIndex.getDelDocs()); // inherit deletes
        idx.commitDeletes();
        idx.incrementEventCount(ramIndex.getEventsHandled());
       
        //Map<String, String> commitData = idx.getCommitData();
        //System.out.println("disk vesion from the commit data" + commitData); 
       
        //V newVersion = idx.getVersion().compareTo(ramIndex.getVersion()) < 0 ? ramIndex.getVersion(): idx.getVersion();
        V newVersion = idx.getVersion() == null ? ramIndex.getVersion() : (idx.getVersion().compareTo(ramIndex.getVersion()) < 0 ? ramIndex.getVersion(): idx.getVersion());
        idx.setVersion(newVersion);
        //System.out.println("disk verson from the signature" + newVersion.toString());       
              
        //idx.setVersion(Math.max(idx.getVersion(), ramIndex.getVersion()));
      }
      catch(IOException ioe)
      {
        ZoieHealth.setFatal();
        log.error("Problem copying segments: " + ioe.getMessage(), ioe);
        throw new ZoieException(ioe);
      }
    }
   
 
  /**
   * @return the version number of the search index.
   */
  public V getVersion()
  {
    BaseSearchIndex<R,V> idx = getSearchIndex();
    V version = null;
    if (idx != null) version = idx.getVersion();
    return version;
  }
}
TOP

Related Classes of proj.zoie.impl.indexing.internal.LuceneIndexDataLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.