Package com.senseidb.indexing

Source Code of com.senseidb.indexing.DefaultStreamingIndexingManager$DataDispatcher

/**
* This software is licensed to you under the Apache License, Version 2.0 (the
* "Apache License").
*
* LinkedIn's contributions are made under the Apache License. If you contribute
* to the Software, the contributions will be deemed to have been made under the
* Apache License, unless you expressly indicate otherwise. Please do not make any
* contributions that would be inconsistent with the Apache License.
*
* You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, this software
* distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
* License for the specific language governing permissions and limitations for the
* software governed under the Apache License.
*
* © 2012 LinkedIn Corp. All Rights Reserved.
*/
package com.senseidb.indexing;

import com.senseidb.metrics.MetricFactory;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import javax.management.StandardMBean;

import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import org.json.JSONObject;

import proj.zoie.api.DataConsumer;
import proj.zoie.api.DataConsumer.DataEvent;
import proj.zoie.api.DataProvider;
import proj.zoie.api.Zoie;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieIndexReader;
import proj.zoie.impl.indexing.StreamDataProvider;
import proj.zoie.impl.indexing.ZoieConfig;
import proj.zoie.mbean.DataProviderAdmin;
import proj.zoie.mbean.DataProviderAdminMBean;

import com.browseengine.bobo.api.BoboIndexReader;
import com.senseidb.conf.SenseiSchema;
import com.senseidb.gateway.SenseiGateway;
import com.senseidb.jmx.JmxUtil;
import com.senseidb.metrics.MetricsConstants;
import com.senseidb.plugin.SenseiPluginRegistry;
import com.senseidb.search.node.SenseiIndexingManager;
import com.senseidb.search.plugin.PluggableSearchEngineManager;
import com.senseidb.util.JSONUtil.FastJSONArray;
import com.senseidb.util.JSONUtil.FastJSONObject;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.Meter;
import com.yammer.metrics.core.MetricName;

public class DefaultStreamingIndexingManager implements SenseiIndexingManager<JSONObject> {

  private static final Logger logger = Logger.getLogger(DefaultStreamingIndexingManager.class);

  public static final String CONFIG_PREFIX = "sensei.index.manager.default";

  private static final String MAX_PARTITION_ID = "maxpartition.id";

  private static final String EVTS_PER_MIN = "eventsPerMin";

  private static final String BATCH_SIZE = "batchSize";

  private static final String EVENT_CREATED_TIMESTAMP_FIELD = "eventCreatedTimestampField";

  private Meter _providerBatchSizeMeter;
  private Meter _eventMeter;
  private Meter _updateBatchSizeMeter;
  private Meter _indexSizeMeter;
  private long _lastMeasureTime;
  private static final long MEASURE_INTERVAL = 1000 * 60; // 1 minute
  private Timer _indexingLatencyTimer;

  private StreamDataProvider<JSONObject> _dataProvider;
  private String _oldestSinceKey;
  private String _eventCreatedTimestampField;
  private final SenseiSchema _senseiSchema;
  private final Configuration _myconfig;

  private Map<Integer, Zoie<BoboIndexReader, JSONObject>> _zoieSystemMap;
  private final LinkedHashMap<Integer, Collection<DataEvent<JSONObject>>> _dataCollectorMap;

  private final SenseiGateway<?> _gateway;
  private final ShardingStrategy _shardingStrategy;
  private final Comparator<String> _versionComparator;
  private final PluggableSearchEngineManager pluggableSearchEngineManager;
  private SenseiPluginRegistry pluginRegistry;


  public DefaultStreamingIndexingManager(SenseiSchema schema,Configuration senseiConfig,
      SenseiPluginRegistry pluginRegistry, SenseiGateway<?> gateway, ShardingStrategy shardingStrategy, PluggableSearchEngineManager pluggableSearchEngineManager){
      _dataProvider = null;
    _myconfig = senseiConfig.subset(CONFIG_PREFIX);
    _eventCreatedTimestampField = _myconfig.getString(EVENT_CREATED_TIMESTAMP_FIELD, null);
     this.pluginRegistry = pluginRegistry;
    _oldestSinceKey = null;
    _senseiSchema = schema;
    _zoieSystemMap = null;
    _dataCollectorMap = new LinkedHashMap<Integer, Collection<DataEvent<JSONObject>>>();
    _gateway = gateway;
    this.pluggableSearchEngineManager = pluggableSearchEngineManager;
    if (_gateway!=null){
      _versionComparator = _gateway.getVersionComparator();
    }
    else{
      _versionComparator = ZoieConfig.DEFAULT_VERSION_COMPARATOR;
    }
    _shardingStrategy = shardingStrategy;
  }

  public void updateOldestSinceKey(String sinceKey){
      if(_oldestSinceKey == null){
        _oldestSinceKey = sinceKey;
        if (_dataProvider != null) {
          _dataProvider.setStartingOffset(_oldestSinceKey);
        }
      }
      else if(sinceKey!=null && _versionComparator.compare(sinceKey, _oldestSinceKey) <0 ){
        _oldestSinceKey = sinceKey;
        if (_dataProvider != null) {
          _dataProvider.setStartingOffset(_oldestSinceKey);
        }
      }
  }

  private Meter registerMeter(String name, String eventType) {
    return MetricFactory.newMeter(new MetricName(MetricsConstants.Domain, "meter", name, "indexing-manager"),
                                  eventType,
                                  TimeUnit.SECONDS);
  }

  private Timer registerTimer(String name)
  {
    return MetricFactory.newTimer(new MetricName(MetricsConstants.Domain, "timer", name, "indexing-manager"),
                                  TimeUnit.MILLISECONDS,
                                  TimeUnit.SECONDS);
  }

  @Override
  public void initialize(
      Map<Integer, Zoie<BoboIndexReader, JSONObject>> zoieSystemMap)
      throws Exception {

    int maxPartitionId = _myconfig.getInt(MAX_PARTITION_ID)+1;
    String uidField = _senseiSchema.getUidField();
    DataDispatcher consumer = new DataDispatcher(maxPartitionId,uidField);

    _zoieSystemMap = zoieSystemMap;

      Iterator<Integer> it = zoieSystemMap.keySet().iterator();
      while(it.hasNext()){
        int part = it.next();
        Zoie<BoboIndexReader,JSONObject> zoie = zoieSystemMap.get(part);
        updateOldestSinceKey(zoie.getVersion());
        _dataCollectorMap.put(part, new LinkedList<DataEvent<JSONObject>>());
      }

      if (pluggableSearchEngineManager != null && pluggableSearchEngineManager.getOldestVersion() != null && !("".equals(pluggableSearchEngineManager.getOldestVersion()))) {
        updateOldestSinceKey(pluggableSearchEngineManager.getOldestVersion());     
      }

      _dataProvider = buildDataProvider();

      if (_dataProvider!=null){
      _dataProvider.setDataConsumer(consumer);
      }    
  }

  @Override
  public DataProvider<JSONObject> getDataProvider()
  {
    return _dataProvider;
  }

  private StreamDataProvider<JSONObject> buildDataProvider() throws ConfigurationException{
    StreamDataProvider<JSONObject> dataProvider = null;
    if (_gateway!=null){
      try{
        dataProvider = _gateway.buildDataProvider(_senseiSchema, _oldestSinceKey,_shardingStrategy,_zoieSystemMap.keySet());
        long maxEventsPerMin = _myconfig.getLong(EVTS_PER_MIN,40000);
        dataProvider.setMaxEventsPerMinute(maxEventsPerMin);
        int batchSize = _myconfig.getInt(BATCH_SIZE,1);
        dataProvider.setBatchSize(batchSize);
       }
      catch(Exception e){
        throw new ConfigurationException(e.getMessage(),e);
      }

      try {
       StandardMBean dataProviderMbean = new StandardMBean(new DataProviderAdmin(dataProvider), DataProviderAdminMBean.class);
       JmxUtil.registerMBean(dataProviderMbean, "indexing-manager","stream-data-provider");
      } catch (Exception e) {
        logger.error(e.getMessage(),e);
      }
    }
    return dataProvider;
  }

  @Override
  public void shutdown() {
    if (pluggableSearchEngineManager != null) {
      pluggableSearchEngineManager.close();
    }
    if (_dataProvider!=null){
      _dataProvider.stop();
    }
    if (_providerBatchSizeMeter != null) {
      _providerBatchSizeMeter.stop();
    }
    if (_updateBatchSizeMeter != null) {
      _updateBatchSizeMeter.stop();
    }
    if (_indexSizeMeter != null) {
        _indexSizeMeter.stop();
    }
    if (_eventMeter != null) {
      _eventMeter.stop();
    }
  }

  @Override
  public void start() throws Exception {
    if (_dataProvider==null){
      logger.warn("no data stream configured, no indexing events are flowing.");
    }
    else{
      _providerBatchSizeMeter = registerMeter("provider-batch-size", "provide-batch-size");
      _updateBatchSizeMeter = registerMeter("update-batch-size", "update-batch-size");
      _eventMeter = registerMeter("indexing-events", "indexing-events");
      _indexSizeMeter = registerMeter("index-size", "index-size");
      _indexingLatencyTimer = registerTimer("indexing-latency");

      _dataProvider.start();
    }
  }

  @Override
  public void syncWithVersion(long timeToWait, String version) throws ZoieException
  {
    Iterator<Integer> itr = _zoieSystemMap.keySet().iterator();
    while (itr.hasNext())
    {
      int part_num = itr.next();
      Zoie<BoboIndexReader,JSONObject> dataConsumer = _zoieSystemMap.get(part_num);
      if (dataConsumer != null)
      {
        dataConsumer.syncWithVersion(timeToWait, version);
      }
    }
  }

  private class DataDispatcher implements DataConsumer<JSONObject>
  {
    int _maxPartitionId;  // the total number of partitions over all the nodes;
    private final String _uidField;
    private volatile String _currentVersion;

    public DataDispatcher(int maxPartitionId,String uidField){
      _maxPartitionId = maxPartitionId;
      _uidField = uidField;
      _currentVersion = null;
    }

    private void reportIndexingLatency(JSONObject obj)
    {
      if (_eventCreatedTimestampField != null)
      {
        long createdTimestamp = obj.optLong(_eventCreatedTimestampField);
        if (createdTimestamp > 0)
        {
          _indexingLatencyTimer.update(System.currentTimeMillis() - createdTimestamp,
                                      TimeUnit.MILLISECONDS);
        }
      }
    }

    private JSONObject rewriteData(JSONObject obj, int partNum)
    {
      String type = obj.optString(SenseiSchema.EVENT_TYPE_FIELD, null);

      JSONObject event = obj.optJSONObject(SenseiSchema.EVENT_FIELD);
      if (event == null)
        event = obj;
      else if (type != null)
      {
        try
        {
          event.put(SenseiSchema.EVENT_TYPE_FIELD, type);
        }
        catch(Exception e)
        {
          logger.error("Should never happen", e);
        }
      }

      reportIndexingLatency(event);

      if (SenseiSchema.EVENT_TYPE_UPDATE.equalsIgnoreCase(type))
      {
        Zoie<BoboIndexReader, JSONObject> zoie = _zoieSystemMap.get(partNum);
        List<ZoieIndexReader<BoboIndexReader>> readers;
        try
        {
          readers = zoie.getIndexReaders();
        }
        catch(Exception e)
        {
          logger.error(e.getMessage(), e);
          return null;
        }

        if (readers == null)
        {
          logger.error("Cannot found original doc for and update event: " + obj);
          return null;
        }
        try
        {
          byte[] src = null;
          long uid = Long.parseLong(event.getString(_senseiSchema.getUidField()));
          for (ZoieIndexReader<BoboIndexReader> reader : readers)
          {           
            src = reader.getStoredValue(uid);
            if (src != null)
              break;
          }         
          byte[] data = null;

          if (_senseiSchema.isCompressSrcData())
            data = DefaultJsonSchemaInterpreter.decompress(src);
          else
            data = src;

          if (data == null)
          {
            logger.error("Cannot found original doc for and update event: " + obj);
            return null;
          }

          JSONObject newEvent = new FastJSONObject(new String(data, "UTF-8"));
          Iterator<String> keys = event.keys();
          while(keys.hasNext())
          {
            String key = keys.next();
            newEvent.put(key, event.get(key));
          }
          event = newEvent;
        }
        catch (Exception e)
        {
          logger.error(e.getMessage(), e);
          return null;
        }
        finally
        {
          zoie.returnIndexReaders(readers);
        }
      }

      return event;
    }

    @Override
    public void consume(Collection<proj.zoie.api.DataConsumer.DataEvent<JSONObject>> data) throws ZoieException
    {
      _updateBatchSizeMeter.mark(data.size());
      _providerBatchSizeMeter.mark(_dataProvider.getBatchSize());
      _eventMeter.mark(_dataProvider.getEventCount());

      try{
        for(DataEvent<JSONObject> dataEvt : data){
          JSONObject obj = dataEvt.getData();

          if (obj == null) // Just ignore this event.
            continue;

          String version = dataEvt.getVersion();
          _currentVersion = (_versionComparator.compare(_currentVersion, version) < 0) ? version : _currentVersion;
          if (pluggableSearchEngineManager != null && pluggableSearchEngineManager.acceptEventsForAllPartitions()) {
            obj = pluggableSearchEngineManager.update(obj, _currentVersion);
          }

          int routeToPart = _shardingStrategy.caculateShard(_maxPartitionId, obj);
          Collection<DataEvent<JSONObject>> partDataSet = _dataCollectorMap.get(routeToPart);
          if (partDataSet != null)
          {
            JSONObject rewrited = obj;
            if (pluggableSearchEngineManager != null && !pluggableSearchEngineManager.acceptEventsForAllPartitions()) {
              rewrited = pluggableSearchEngineManager.update(obj, dataEvt.getVersion());
            }
            rewrited = rewriteData(obj, routeToPart);
            if (rewrited != null)
            {
             
              if (rewrited != obj)
                dataEvt = new DataEvent<JSONObject>(rewrited, dataEvt.getVersion(), dataEvt.getWeight());
              partDataSet.add(dataEvt);
            }
          }
        }

        long indexSize = 0;
        long now = System.currentTimeMillis();
        boolean measureIndexSize = now - _lastMeasureTime > MEASURE_INTERVAL ? true : false;
        _lastMeasureTime = now;

        Iterator<Integer> it = _zoieSystemMap.keySet().iterator();
        while(it.hasNext()){
          int part_num = it.next();
          Zoie<BoboIndexReader,JSONObject> dataConsumer = _zoieSystemMap.get(part_num);
          if (dataConsumer!=null){
            LinkedList<DataEvent<JSONObject>> partDataSet =
              (LinkedList<DataEvent<JSONObject>>) _dataCollectorMap.get(part_num);
            if (partDataSet != null)
            {
              if (partDataSet.size() == 0)
              {
                JSONObject markerObj = new FastJSONObject();
                //markerObj.put(_senseiSchema.getSkipField(), "true");
                markerObj.put(SenseiSchema.EVENT_TYPE_FIELD, SenseiSchema.EVENT_TYPE_SKIP);
                markerObj.put(_uidField, 0L); // Add a dummy uid
                partDataSet.add(new DataEvent<JSONObject>(markerObj, _currentVersion));
              }
              else if (_currentVersion != null && !_currentVersion.equals(partDataSet.getLast().getVersion()))
              {
                DataEvent<JSONObject> last = partDataSet.pollLast();
                partDataSet.add(new DataEvent<JSONObject>(last.getData(), _currentVersion, last.getWeight()));
              }
              dataConsumer.consume(partDataSet);
            }

            if (measureIndexSize)
                indexSize += dataConsumer.getAdminMBean().getDiskIndexSizeBytes();
          }
          _dataCollectorMap.put(part_num, new LinkedList<DataEvent<JSONObject>>());

          if (measureIndexSize)
            _indexSizeMeter.mark(indexSize);
        }
      }
      catch(Exception e){
        throw new ZoieException(e.getMessage(),e);
      }
    }

    @Override
    public String getVersion()
    {
      return _currentVersion;
    }

    @Override
    public Comparator<String> getVersionComparator() {
      return _versionComparator;
    }
  }
}
TOP

Related Classes of com.senseidb.indexing.DefaultStreamingIndexingManager$DataDispatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.