Package com.linkedin.databus.client

Source Code of com.linkedin.databus.client.RelayPullThread

package com.linkedin.databus.client;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

import java.io.ByteArrayInputStream;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;

import javax.management.MBeanServer;

import org.apache.log4j.Logger;

import com.linkedin.databus.client.ConnectionState.StateId;
import com.linkedin.databus.client.netty.RemoteExceptionHandler;
import com.linkedin.databus.client.pub.ServerInfo;
import com.linkedin.databus.client.pub.mbean.UnifiedClientStats;
import com.linkedin.databus.core.Checkpoint;
import com.linkedin.databus.core.CheckpointMult;
import com.linkedin.databus.core.DatabusComponentStatus;
import com.linkedin.databus.core.DbusClientMode;
import com.linkedin.databus.core.DbusEvent;
import com.linkedin.databus.core.DbusEventBuffer;
import com.linkedin.databus.core.DbusEventFactory;
import com.linkedin.databus.core.InvalidEventException;
import com.linkedin.databus.core.PendingEventTooLargeException;
import com.linkedin.databus.core.PullerRetriesExhaustedException;
import com.linkedin.databus.core.SCNRegressMessage;
import com.linkedin.databus.core.ScnNotFoundException;
import com.linkedin.databus.core.async.LifecycleMessage;
import com.linkedin.databus.core.data_model.DatabusSubscription;
import com.linkedin.databus.core.data_model.LogicalSource;
import com.linkedin.databus.core.data_model.LogicalSourceId;
import com.linkedin.databus.core.data_model.PhysicalPartition;
import com.linkedin.databus.core.monitoring.mbean.DbusEventsStatisticsCollector;
import com.linkedin.databus.core.util.IdNamePair;
import com.linkedin.databus2.core.BackoffTimer;
import com.linkedin.databus2.core.BackoffTimerStaticConfig;
import com.linkedin.databus2.core.DatabusException;
import com.linkedin.databus2.core.container.monitoring.mbean.HttpStatisticsCollector;
import com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException;
import com.linkedin.databus2.core.container.request.RegisterResponseEntry;
import com.linkedin.databus2.core.filter.DbusKeyCompositeFilter;
import com.linkedin.databus2.core.filter.DbusKeyCompositeFilterConfig;
import com.linkedin.databus2.core.filter.KeyFilterConfigHolder;

public class RelayPullThread extends BasePullThread
{
  private final static int RELAY_CALLS_MERGE_FREQ = 10;
  private static final ArrayList<RegisterResponseEntry> EMPTY_REGISTER_LIST =
      new ArrayList<RegisterResponseEntry>();
  private static final ArrayList<Integer> EMPTY_STREAM_LIST =
      new ArrayList<Integer>();

  private final boolean _isConsumeCurrent ;
  private final HttpStatisticsCollector _relayCallsStats;

  //Keep track of the last open relay connection so that we can close it on shutdown
  private DatabusRelayConnection _lastOpenConnection;

  private DbusKeyCompositeFilter _relayFilter;
  private final List<DbusKeyCompositeFilterConfig> _relayFilterConfigs;

  private int _unmergedHttpCallsStats = 0;
  private long _streamCallStartMs = 0;

  // if that much time passes with no events - reset the connection, and pick another server
  private long _noEventsConnectionResetTimeSec;
  private long _timeSinceEventsSec = System.currentTimeMillis();

  /**
   * When RelayPullThread  falls-off the relay, it goes to the Fell-Off mode and the below retryConfig (_retriesOnFallOff)
   * control the retries. The fell-off mode is also set when the client starts with a bootstrap-ckpt.
   *
   * <pre>
   * The fell-off mode will only be reset after
   *   (a) Bootstrap got completed
   *   (b) streamFromLastSCN gets triggered.
   *   (c) Server-Set Change which results in tearing the current connection
   *
   *
   *   Behavior:
   *
   *   (a) When bootstrap enabled,
   *          On first SCNNotFoundException,
   *            1 Fell-Off mode is set.
   *            2. In Pick_SERVER state, RelayPuller will re-establish the connection (hopefully to new relay in case of VIPS) and retry depending upon the retry-config.
   *            3. On subsequent retry, any failure could happen (SCNNotFound or ConnClose or any other). All these will still happen in fell-off mode and the _retriesOnFallOff config will control retries.
   *            4. If the _retriesOnFallOff gets exhausted, will go to bootstrap mode and reset the _retriesOnFallOff counter.
   *            5. When bootstrap gets completed, Fell_Off mode is reset
   *
   *   (b) When bootstrap disabled and streamFromLastSCNonFellOff set
   *            1 Fell-Off mode is set.
   *            2. In Pick_SERVER state, RelayPuller will re-establish the connection (hopefully to new relay in case of VIPS) and retry depending upon the retry-config.
   *            3. On subsequent retry, any failure could happen (SCNNotFound or ConnClose or any other). All these will still happen in fell-off mode and the _retriesOnFallOff config will control retries.
   *            4. If the _retriesOnFallOff gets exhausted, will set the flag to make current relay stream from the latest SCN and resets fell-Off mode
   *
   *   (c) When bootstrap disabled and  streamFromLastSCNonFellOff not set
   *            1. Fell-Off mode is set.
   *            2. In Pick_SERVER state, RelayPuller will re-establish the connection (hopefully to new relay in case of VIPS) and retry depending upon the retry-config.
   *            3. On subsequent retry, any failure could happen (SCNNotFound or ConnClose or any other). All these will still happen in fell-off mode and the _retriesOnFallOff config will control retries.
   *            4. If the _retriesOnFallOff gets exhausted, the relay-pull thread gets suspended.
   * </pre>
   */
  private final BackoffTimer _retriesOnFallOff;

  final private RemoteExceptionHandler _remoteExceptionHandler;
  private final boolean _isReadLatestScnOnErrorEnabled;
  private final double _pullerBufferUtilizationPct ;



  public RelayPullThread(String name,
                         DatabusSourcesConnection sourcesConn,
                         DbusEventBuffer dbusEventBuffer,
                         ConnectionStateFactory connStateFactory,
                         Set<ServerInfo> relays,
                         List<DbusKeyCompositeFilterConfig> relayFilterConfigs,
                         boolean isConsumeCurrent,
                         boolean isReadLatestScnOnErrorEnabled,
                         double pullerBufferUtilPct,
                         int noEventsConnectionResetTimeSec,
                         MBeanServer mbeanServer,
                         DbusEventFactory eventFactory,
                         Logger log)
  {
    super(name, sourcesConn.getConnectionConfig().getPullerRetries(), sourcesConn, dbusEventBuffer,
          connStateFactory, relays, mbeanServer, eventFactory, log);
    _relayFilterConfigs = relayFilterConfigs;
    _isConsumeCurrent = isConsumeCurrent;
    _remoteExceptionHandler = new RemoteExceptionHandler(sourcesConn, dbusEventBuffer, eventFactory);
    _relayCallsStats = _sourcesConn.getLocalRelayCallsStatsCollector();
    _isReadLatestScnOnErrorEnabled = isReadLatestScnOnErrorEnabled;
    _pullerBufferUtilizationPct = pullerBufferUtilPct;
    _retriesOnFallOff = new BackoffTimer("RetriesOnFallOff",
        new BackoffTimerStaticConfig(0, 0, 1, 0, sourcesConn.getConnectionConfig().getNumRetriesOnFallOff()));
    _noEventsConnectionResetTimeSec = noEventsConnectionResetTimeSec;
  }


  @Override
  protected void onResume()
  {
    _currentState.switchToPickServer();
    enqueueMessage(_currentState);
  }

  @Override
  protected void onShutdown()
  {
    if (null != _lastOpenConnection)
    {
      _log.info("closing open connection during onShutdown()");
      _lastOpenConnection.close();
      _lastOpenConnection = null;
      _log.info("Closed open connection during onShutdown()");
    }
  }

  /**
   * Invoked when a LifeCycle message of type "SUSPEND_ON_ERROR" is received by relay puller thread
   * as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
   *
   * 1. Invoke same method in super class
   * 2. Send an "I'm dead" heartbeat value as a failsafe
   */
  @Override
  protected void doSuspendOnError(LifecycleMessage lcMessage)
  {
    super.doSuspendOnError(lcMessage);
    sendHeartbeat(_sourcesConn.getUnifiedClientStats(), -1);
  }

  @Override
  protected boolean shouldDelayTearConnection(StateId stateId)
  {
    boolean delayTear = false;
    switch(stateId)
    {
      case SOURCES_REQUEST_SENT:
      case SOURCES_RESPONSE_SUCCESS:
      case SOURCES_RESPONSE_ERROR:
      case SOURCES_REQUEST_ERROR:
      case REGISTER_REQUEST_SENT :
      case REGISTER_RESPONSE_SUCCESS:
      case REGISTER_RESPONSE_ERROR:
      case REGISTER_REQUEST_ERROR:
      case STREAM_REQUEST_SENT:
      case STREAM_REQUEST_SUCCESS:
      case STREAM_RESPONSE_ERROR:
      case STREAM_REQUEST_ERROR:
      case BOOTSTRAP_REQUESTED:
        delayTear = true;
        break;
      default:
        delayTear = false;
        break;
    }

    return delayTear;
  }

  @Override
  protected boolean executeAndChangeState(Object message)
  {
    boolean success = true;

    if (message instanceof ConnectionStateMessage)
    {
      if (_componentStatus.getStatus() != DatabusComponentStatus.Status.RUNNING)
      {
        _log.warn(" not running: " + message.toString());
      }
      else
      {
        ConnectionStateMessage stateMsg = (ConnectionStateMessage)message;
        ConnectionState currentState = stateMsg.getConnState();

        switch (stateMsg.getStateId())
        {
          case INITIAL: break;
          case CLOSED: shutdown(); break;
          case PICK_SERVER: doPickRelay(currentState); break;
          case REQUEST_SOURCES: doRequestSources(currentState); break;
          case SOURCES_RESPONSE_SUCCESS: doSourcesResponseSuccess(currentState); break;
          case REQUEST_REGISTER: doRequestRegister(currentState); break;
          case REGISTER_RESPONSE_SUCCESS: doRegisterResponseSuccess(currentState); break;
          case BOOTSTRAP: doBootstrap(currentState); break;
          case REQUEST_STREAM: doRequestStream(currentState); break;
          case STREAM_REQUEST_SUCCESS: doReadDataEvents(currentState); break;
          case STREAM_RESPONSE_DONE: doStreamResponseDone(currentState); break;
          case SOURCES_REQUEST_ERROR: processSourcesRequestError(currentState); break;
          case SOURCES_RESPONSE_ERROR: processSourcesResponseError(currentState); break;
          case REGISTER_REQUEST_ERROR: processRegisterRequestError(currentState); break;
          case REGISTER_RESPONSE_ERROR: processRegisterResponseError(currentState); break;
          case STREAM_REQUEST_ERROR: processStreamRequestError(currentState); break;
          case STREAM_RESPONSE_ERROR: processStreamResponseError(currentState); break;
          default:
          {
            _log.error("Unknown state in RelayPullThread: " + currentState.getStateId());
            success = false;
            break;
          }
        }
      }
    }
    else if (message instanceof BootstrapResultMessage)
    {
      BootstrapResultMessage bootstrapResultMessage = (BootstrapResultMessage)message;

      switch (bootstrapResultMessage.getTypeId())
      {
        case BOOTSTRAP_COMPLETE: doBootstrapComplete(bootstrapResultMessage); break;
        case BOOTSTRAP_FAILED: doBootstrapFailed(bootstrapResultMessage); break;
        default:
        {
          _log.error("Unknown BootstrapResultChangeMessage in RelayPullThread: " +
                    bootstrapResultMessage.getTypeId());
          success = false;
          break;
        }
      }
    }
    else
    {
      success = super.executeAndChangeState(message);
    }

    return success;
  }

  private void doBootstrapFailed(BootstrapResultMessage bootstrapResultMessage)
  {
    _log.error("bootstrap failed", bootstrapResultMessage.getFailureReason());
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      _currentState.switchToBootstrap(_currentState.getCheckpoint());
      enqueueMessage(_currentState);
    }
  }

  private void doBootstrapComplete(BootstrapResultMessage bootstrapResultMessage)
  {
    Checkpoint cp = bootstrapResultMessage.getBootstrapCheckpoint();

    // we must have persisted some checkpoint at the end of the bootstrap phase
    if (null == cp)
    {
      bootstrapResultMessage.switchToBootstrapFailed(
          new RuntimeException("No persistent checkpoint found at the end of bootstrap!"));
      doBootstrapFailed(bootstrapResultMessage);
    }
    else
    {
      _currentState.setRelayFellOff(false);

      DbusClientMode consumptionMode = cp.getConsumptionMode();

      _log.info("Bootstrap completed: " +
               "Consumption Mode=" + consumptionMode +
               " startScn=" + cp.getBootstrapStartScn() +
               " targetScn=" + cp.getBootstrapTargetScn() +
               " sinceScn=" + cp.getBootstrapSinceScn() +
               " windowScn=" + cp.getWindowScn());

      UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
      if (unifiedClientStats != null)
      {
        // should always be false, but just in case of weird edge cases:
        boolean isBootstrapping = (consumptionMode == DbusClientMode.BOOTSTRAP_SNAPSHOT ||
                                   consumptionMode == DbusClientMode.BOOTSTRAP_CATCHUP);
        unifiedClientStats.setBootstrappingState(isBootstrapping);
      }

      cp.resetBootstrap(); // clear Bootstrap scns for future bootstraps

      if ( toTearConnAfterHandlingResponse())
      {
        tearConnectionAndEnqueuePickServer();
      } else {
        _currentState.switchToRequestStream(cp);
        enqueueMessage(_currentState);
      }
    }
  }

  @Override
  protected void doStart(LifecycleMessage lcMessage)
  {
    _log.info("Relay Puller doStart ");

    if (_currentState.getStateId() != ConnectionState.StateId.INITIAL)
    {
      return;
    }
    super.doStart(lcMessage);

    _currentState.switchToPickServer();
    enqueueMessage(_currentState);
  }

  protected void doPickRelay(ConnectionState curState)
  {
    int serversNum = _servers.size();
    if (0 == serversNum)
    {
      enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(
          new DatabusException("No relays specified")));
      return;
    }

    Random rng = new Random();
    DatabusRelayConnection relayConn = null;
    ServerInfo serverInfo = null;

    int retriesLeft;
    BackoffTimer originalCounter =_status.getRetriesCounter();

    if( curState.isRelayFellOff())
      _status.setRetriesCounter(_retriesOnFallOff);

    while (null == relayConn && (retriesLeft = _status.getRetriesLeft()) >= 0 && !checkForShutdownRequest())
    {
      _log.info("picking a relay; retries left:" + retriesLeft +
                ", Backoff Timer :" + _status.getRetriesCounter() +
                ", Are we retrying because of SCNNotFoundException : " + curState.isRelayFellOff());

      backoffOnPullError();

      _curServerIdx =  (_curServerIdx < 0) ? rng.nextInt(serversNum)
                                           : (_curServerIdx + 1) % serversNum;

      Iterator<ServerInfo> setIter = _servers.iterator();
      for (int i = 0; i <= _curServerIdx; ++i) serverInfo = setIter.next();

      try
      {
        relayConn = _sourcesConn.getRelayConnFactory().createRelayConnection(
            serverInfo, this, _remoteExceptionHandler);
        _log.info("picked a relay:" + serverInfo.toSimpleString());
      }
      catch (Exception e)
      {
        _log.error("Unable to get connection to relay:" + serverInfo.toSimpleString(), e);
      }
    }

    _status.setRetriesCounter(originalCounter);

    if (!checkForShutdownRequest())
    {
      _curServer = serverInfo;

      if (null == relayConn)
      {
        if (_currentState.isRelayFellOff())
        {
          boolean enqueueMessage = false;

          try
          {
            enqueueMessage = onRelayFellOff(curState,
                              curState.getCheckpoint(),
                              new ScnNotFoundException("Retries on SCNNotFoundException exhausted !!"));
          } catch (InterruptedException ie) {
                _log.error("interrupted while processing onRelayFellOff", ie);
                curState.switchToPickServer();
                enqueueMessage(curState);
          } catch (InvalidEventException e) {
                _log.error("error trying to notify dispatcher of bootstrapping :" + e.getMessage(), e);
                curState.switchToPickServer();
                enqueueMessage(curState);
          }

          if ( enqueueMessage )
            enqueueMessage(curState);
        } else {
          // There are no retries left. Invoke an onError callback
          try
          {
            _log.info("Puller retries exhausted. Injecting an error event on dispatcher queue to invoke onError callback");
            _remoteExceptionHandler.handleException(new PullerRetriesExhaustedException());
          } catch (InterruptedException ie){
            _log.error("Interrupted while processing retries exhausted", ie);
          } catch (InvalidEventException e) {
            _log.error("Error trying to notify dispatcher of puller retries getting exhausted", e);
          }
          _log.error("Cannot find a relay");
        }
      }
      else
      {
        DatabusRelayConnection oldRelayConn = curState.getRelayConnection();

        if ( null != oldRelayConn)
        {
          resetConnectionAndSetFlag();
        }

        sendHeartbeat(_sourcesConn.getUnifiedClientStats());

        _log.info("Relay Puller switching to request sources");
        curState.switchToRequestSources(serverInfo, serverInfo.getAddress(), relayConn);
        _lastOpenConnection = relayConn;
        enqueueMessage(curState);
      }
    }
  }

  protected void doRequestSources(final ConnectionState curState)
  {
    if (null != _relayCallsStats) _relayCallsStats.registerSourcesCall();
    _log.debug("Sending /sources request");
    curState.switchToSourcesRequestSent();
    sendHeartbeat(_sourcesConn.getUnifiedClientStats());
    curState.getRelayConnection().requestSources(curState);
  }

  /**
   * Build the string of subs to be sent in the register call.
   * */
  private String buildSubsList(List<DatabusSubscription> subs, Map<String, IdNamePair> sourceNameMap) {
    StringBuilder sb = new StringBuilder(128);
    sb.append("[");
    boolean first = true;
    for(DatabusSubscription sub :subs) {
      if(!first)
        sb.append(',');
      DatabusSubscription realSub = sub;
      LogicalSource ls = sub.getLogicalSource();
      if (!ls.idKnown() && !ls.isWildcard())
      {
        IdNamePair sourceEntry = sourceNameMap.get(ls.getName());
        if (null == sourceEntry)
        {
          //this should never happen
          throw new RuntimeException("FATAL! unable to find logical source " + ls.getName() + " in "
                                     + sourceNameMap);
        }
        realSub = new DatabusSubscription(
            sub.getPhysicalSource(), sub.getPhysicalPartition(),
            new LogicalSourceId(new LogicalSource(sourceEntry.getId().intValue(), ls.getName()),
                                sub.getLogicalPartition().getId()));
      }
      sb.append(realSub.toJsonString());
      first = false;
    }
    sb.append("]");
    return sb.toString();
  }

  protected void doSourcesResponseSuccess(ConnectionState curState)
  {

    mergeRelayCallsStats();

    Map<String, IdNamePair> sourceNameMap = curState.getSourcesNameMap();
    StringBuilder sb = new StringBuilder();
    boolean firstSource = true;
    boolean error = false;
    List<IdNamePair> sourcesList = new ArrayList<IdNamePair>(_sourcesConn.getSourcesNames().size());
    if (curState.getRelayConnection().getProtocolVersion() < 3)
    {
      // Generate the list of sources for the sources= parameter to the /stream call.
      // This is needed only for version 2 of the protocol.  For version 3 and higher,
      // we pass a list of subscriptions.
      for (String sourceName: _sourcesConn.getSourcesNames())
      {
        IdNamePair source = sourceNameMap.get(sourceName);
        if (null == source)
        {
          _log.error("Source not found on server: " + sourceName);
          error = true;
          break;
        }
        else
        {
          if (! firstSource) sb.append(',');
          sb.append(source.getId().toString());
          firstSource = false;
          sourcesList.add(source);
        }
      }
    }
    String sourcesIdList = sb.toString();
    String subsString = error ? "ERROR" : buildSubsList(curState.getSubscriptions(), sourceNameMap);
    if (_log.isDebugEnabled())
    {
      _log.debug("Source ids: " + sourcesIdList);
      _log.debug("Subs : " + subsString);
    }

    _sourcesConn.getRelayDispatcher().enqueueMessage(
        SourcesMessage.createSetSourcesIdsMessage(curState.getSources(), sourcesIdList));

    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      if (error)
      {
        curState.switchToPickServer();
      }
      else
      {
        curState.switchToRequestSourcesSchemas(sourcesIdList, subsString);

        if (_sourcesConn.isBootstrapEnabled())
        {
          _sourcesConn.getBootstrapPuller().enqueueMessage(
              SourcesMessage.createSetSourcesIdsMessage(curState.getSources(),
                                                        curState.getSourcesIdListString()));
        }
        String hostHdr = curState.getHostName(), svcHdr = curState.getSvcName();
        _log.info("Connected to relay " + hostHdr + " with a service identifier " + svcHdr);
      }
      enqueueMessage(curState);
    }
  }

  protected void doRequestRegister(ConnectionState curState)
  {
    if (null != _relayCallsStats) _relayCallsStats.registerRegisterCall(EMPTY_REGISTER_LIST);
    _log.debug("Sending /sources request");
    curState.swichToRegisterRequestSent();
    sendHeartbeat(_sourcesConn.getUnifiedClientStats());
    curState.getRelayConnection().requestRegister(curState.getSourcesIdListString(), curState);
  }

  protected void doRegisterResponseSuccess(ConnectionState curState)
  {
    boolean enqueueMessage = true;
    mergeRelayCallsStats();

    if(curState.getSourcesSchemas().size() < _sourcesConn.getSourcesNames().size())
    {
      _log.error("Expected " + _sourcesConn.getSourcesNames().size() + " schemas, got: " +
                curState.getSourcesSchemas().size());
      curState.switchToPickServer();
    }
    else
    {
      _sourcesConn.getRelayDispatcher().enqueueMessage(
          SourcesMessage.createSetSourcesSchemasMessage(
              curState.getSourcesSchemas(),
              curState.getMetadataSchemas()
              )
          );

      // Determine the checkpoint for read events in the following order
      // 1. Existing checkpoint in the current state
      // 2. Checkpoint persisted on disk
      // 3. New checkpoint

      Checkpoint cp = _currentState.getCheckpoint();

      if (null == cp)
      {
        _log.info("no existing checkpoint found; attempting to load persistent checkpoint");
        cp = _sourcesConn.loadPersistentCheckpoint();
      }

      if (null == cp)
      {
        _log.info(getName() + ": no checkpoint found");
        cp = new Checkpoint();

        // set the mode to streaming first so relay will inspect the scn
        cp.setConsumptionMode(DbusClientMode.ONLINE_CONSUMPTION);

        // setting windowScn makes server to return scn not found error
        cp.setWindowScn(0L);
        cp.clearBootstrapStartScn();

        if (_isConsumeCurrent)
        {
          cp.setFlexible();
          _log.info("Setting flexible checkpoint: consumeCurrent is true");
        }
      }
      else
      {
        _log.info("persisted checkpoint loaded: " + cp.toString());
      }

      if ( cp.getFlexible())
        curState.setFlexibleCheckpointRequest(true);

      if (toTearConnAfterHandlingResponse())
      {
        tearConnectionAndEnqueuePickServer();
        enqueueMessage = false;
      }
      else
      {
        if (_sourcesConn.isBootstrapEnabled())
        {
          _sourcesConn.getBootstrapPuller().enqueueMessage(
              SourcesMessage.createSetSourcesSchemasMessage(
                  curState.getSourcesSchemas(), curState.getMetadataSchemas()));
        }

        if (DbusClientMode.BOOTSTRAP_SNAPSHOT == cp.getConsumptionMode() ||
            DbusClientMode.BOOTSTRAP_CATCHUP == cp.getConsumptionMode())
        {
          curState.setRelayFellOff(true);

          if (_sourcesConn.isBootstrapEnabled())
          {
            curState.switchToBootstrap(cp);
          }
          else
          {
            String message = "bootstrap checkpoint found but bootstrapping is disabled:" + cp;
            _log.error(message);
            _status.suspendOnError(new DatabusException(message));
            enqueueMessage = false;
          }
        }
        else
        {
          if (cp.getWindowOffset() > 0)
          {
            // switched when in middle of Window
            _log.info("RelayPuller reconnecting when in middle of event window. Will regress. Current Checkpoint :" + cp);
            if (cp.getPrevScn() > 0)
            {
              cp.setWindowScn(cp.getPrevScn());
              cp.setWindowOffset(-1);
              curState.setSCNRegress(true);
            } else if (curState.isFlexibleCheckpointRequest()) {
              _log.info("Switched relays after reading partial window with flexible checkpoint !!");
              cp.setFlexible();
                curState.setSCNRegress(true);
              } else {
              _log.fatal("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp);
              enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(
                  new Exception("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp)));
              enqueueMessage = false;
            }
          }

          if ( enqueueMessage )
            curState.switchToRequestStream(cp);
        }
      }
    }

    if ( enqueueMessage)
      enqueueMessage(curState);
  }

  protected void doRequestStream(ConnectionState curState)
  {
    boolean debugEnabled = _log.isDebugEnabled();

    if (debugEnabled) _log.debug("Checking for free space in buffer");
    int freeBufferThreshold=(int)(_sourcesConn.getConnectionConfig().getFreeBufferThreshold() *
        100.0 / _pullerBufferUtilizationPct);
    try
    {
      curState.getDataEventsBuffer().waitForFreeSpace(freeBufferThreshold);
    }
    catch (InterruptedException ie)
    {
      //loop
      enqueueMessage(curState);
      return;
    }

    Checkpoint cp = curState.getCheckpoint();
    if (debugEnabled) _log.debug("Checkpoint at RequestDataEvents: " + cp.toString());

    if ( null == _relayFilter)
    {
      if (debugEnabled) _log.debug("Initializing relay filter config");
      _relayFilter = new DbusKeyCompositeFilter();
      Map<String, IdNamePair> srcNameIdMap = curState.getSourcesNameMap();

      for (DbusKeyCompositeFilterConfig conf : _relayFilterConfigs)
      {
        Map<String, KeyFilterConfigHolder> cMap = conf.getConfigMap();

        Map<Long, KeyFilterConfigHolder> fConfMap = new HashMap<Long, KeyFilterConfigHolder>();
        for ( Entry<String, KeyFilterConfigHolder> e : cMap.entrySet())
        {
          IdNamePair idName = srcNameIdMap.get(e.getKey());

          if ( null != idName)
          {
            fConfMap.put(idName.getId(),e.getValue());
          }
        }
        if (debugEnabled) _log.debug("FilterConfMap is :" + fConfMap);
        _relayFilter.merge(new DbusKeyCompositeFilter(fConfMap));
      }

      if (debugEnabled)
        _log.debug("Merged Filter (before deduping) is :" + _relayFilter);
      _relayFilter.dedupe();
      if (debugEnabled)
        _log.debug("Merged Filter (after deduping) is :" + _relayFilter);
    }

    _streamCallStartMs = System.currentTimeMillis();
    if (null != _relayCallsStats)
      _relayCallsStats.registerStreamRequest(cp, EMPTY_STREAM_LIST);
    int fetchSize = (int)((curState.getDataEventsBuffer().getBufferFreeReadSpace() / 100.0) * _pullerBufferUtilizationPct);
    fetchSize = Math.max(freeBufferThreshold, fetchSize);
    CheckpointMult cpMult = new CheckpointMult();
    String args;
    if (curState.getRelayConnection().getProtocolVersion() >= 3) {
      // for version 3 and higher we pass subscriptions
      args = curState.getSubsListString();
      for (DatabusSubscription sub : curState.getSubscriptions()) {
        PhysicalPartition p = sub.getPhysicalPartition();
        cpMult.addCheckpoint(p, cp);
      }
    } else {
      args = curState.getSourcesIdListString();
      cpMult.addCheckpoint(PhysicalPartition.ANY_PHYSICAL_PARTITION, cp);
    }

    curState.switchToStreamRequestSent();
    sendHeartbeat(_sourcesConn.getUnifiedClientStats());
    curState.getRelayConnection().requestStream(
        args,
        _relayFilter,
        fetchSize,
        cpMult,
        _sourcesConn.getConnectionConfig().getKeyRange(),
        curState);
  }

  protected void doReadDataEvents(ConnectionState curState)
  {
    boolean debugEnabled = _log.isDebugEnabled();
    boolean enqueueMessage = true;
    try
    {
      ChunkedBodyReadableByteChannel readChannel = curState.getReadChannel();
      Checkpoint cp = curState.getCheckpoint();

      curState.setRelayFellOff(false);

      String remoteErrorName = RemoteExceptionHandler.getExceptionName(readChannel);
      Throwable knownRemoteError = _remoteExceptionHandler.getException(readChannel);

      if (null != knownRemoteError &&
          knownRemoteError instanceof ScnNotFoundException)
      {
        if (toTearConnAfterHandlingResponse())
        {
          tearConnectionAndEnqueuePickServer();
          enqueueMessage = false;
        } else {
          curState.setRelayFellOff(true);

          if ( _retriesOnFallOff.getRemainingRetriesNum() > 0 )
          {
            _log.error("Got SCNNotFoundException. Retry (" + _retriesOnFallOff.getRetriesNum() +
                       ") out of " + _retriesOnFallOff.getConfig().getMaxRetryNum());
            curState.switchToPickServer();
          } else {
            enqueueMessage = onRelayFellOff(curState, cp, knownRemoteError);
          }
        }
      }
      else if (null != remoteErrorName)
      {
        if (toTearConnAfterHandlingResponse())
        {
          tearConnectionAndEnqueuePickServer();
          enqueueMessage = false;
        } else {
        //remote processing error
        _log.error("read events error: " + RemoteExceptionHandler.getExceptionMessage(readChannel));
        curState.switchToStreamResponseError();
        }
      }
      else
      {
        /*DispatcherState dispatchState = curState.getDispatcherState();
          dispatchState.switchToDispatchEvents();
          _dispatcherThread.addNewStateBlocking(dispatchState);*/
        if (debugEnabled) _log.debug("Sending events to buffer");

        DbusEventsStatisticsCollector connCollector = _sourcesConn.getInboundEventsStatsCollector();

        if (curState.isSCNRegress())
        {
          _log.info("SCN Regress requested !! Sending a SCN Regress Message to dispatcher. Curr Ckpt :" + curState.getCheckpoint());

          DbusEvent regressEvent = getEventFactory().createSCNRegressEvent(new SCNRegressMessage(curState.getCheckpoint()));

          writeEventToRelayDispatcher(curState, regressEvent, "SCN Regress Event from ckpt :" + curState.getCheckpoint());
          curState.setSCNRegress(false);
        }

        UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
        if (unifiedClientStats != null)
        {
          unifiedClientStats.setBootstrappingState(false)// failsafe:  we're definitely not bootstrapping here
          sendHeartbeat(unifiedClientStats);
        }
        int eventsNum = curState.getDataEventsBuffer().readEvents(readChannel,
                                                                  curState.getListeners(),
                                                                  connCollector);

        boolean resetConnection = false;
        if (eventsNum > 0) {
          _timeSinceEventsSec = System.currentTimeMillis();
          cp.checkPoint();
        } else {           // no need to checkpoint if nothing returned from relay
          // check how long it has been since we got some events
          if (_remoteExceptionHandler.getPendingEventSize(readChannel) >
               curState.getDataEventsBuffer().getMaxReadBufferCapacity())
          {
            // The relay had a pending event that we can never accommodate. This is fatal error.
            String err = "ReadBuffer max capacity(" + curState.getDataEventsBuffer().getMaxReadBufferCapacity() +
                      ") is less than event size(" +
                      _remoteExceptionHandler.getPendingEventSize(readChannel) +
                      "). Increase databus.client.connectionDefaults.eventBuffer.maxEventSize and restart.";
            _log.fatal(err);
            enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new PendingEventTooLargeException(err)));
            return;
          }
          else
          {
            if(_noEventsConnectionResetTimeSec > 0) { // unless the feature is disabled (<=0)
              resetConnection = (System.currentTimeMillis() - _timeSinceEventsSec)/1000 > _noEventsConnectionResetTimeSec;
              if(resetConnection) {
                _timeSinceEventsSec = System.currentTimeMillis();
                _log.warn("about to reset connection to relay " + curState.getServerInetAddress() +
                       ", because there were no events for " + _noEventsConnectionResetTimeSec + "secs");
              }
            }
          }
        }

        if (debugEnabled) _log.debug("Events read: " + eventsNum);

        // if it has been too long since we got non-empty responses - the relay may be stuck, try to reconnect
        if (toTearConnAfterHandlingResponse() || resetConnection)
        {
          tearConnectionAndEnqueuePickServer();
          enqueueMessage = false;
        } else {
          curState.switchToStreamResponseDone();
          resetServerRetries();
        }
      }

      if ( enqueueMessage)
      enqueueMessage(curState);
    }
    catch (InterruptedException ie)
    {
      _log.warn("interrupted", ie);
      curState.switchToStreamResponseError();
      enqueueMessage(curState);
    }
    catch (InvalidEventException e)
    {
      _log.error("error reading events from server:" + e, e);
      curState.switchToStreamResponseError();
      enqueueMessage(curState);
    }
    catch (RuntimeException e)
    {
      _log.error("runtime error reading events from server: " + e, e);
      curState.switchToStreamResponseError();
      enqueueMessage(curState);
    }
  }

  protected void doStreamResponseDone(ConnectionState curState)
  {
    Checkpoint cp = curState.getCheckpoint();
    if (_log.isDebugEnabled())
    {
      _log.debug("Data events done");
      _log.debug("Checkpoint at EventsDone: " + cp.toString());
    }

    if (curState.isFlexibleCheckpointRequest()
        && (curState.getDataEventsBuffer().getSeenEndOfPeriodScn() > 0 ))
      curState.setFlexibleCheckpointRequest(false);

    if (null != _sourcesConn.getLocalRelayCallsStatsCollector())
    {
      long streamDuration = System.currentTimeMillis() - _streamCallStartMs;
      _sourcesConn.getLocalRelayCallsStatsCollector().registerStreamResponse(streamDuration);
      mergeRelayCallsStats();
    }

    _status.getRetriesCounter().sleep();

    curState.switchToRequestStream(cp);
    enqueueMessage(curState);
  }


  protected void doBootstrap(ConnectionState curState)
  {
    if (null != _lastOpenConnection)
    {
      _lastOpenConnection.close();
      _lastOpenConnection = null;
    }

    Checkpoint bootstrapCkpt = null;
    if (_sourcesConn.getBootstrapPuller() == null) {
      _log.warn("doBootstrap got called, but BootstrapPullThread is null. Is bootstrap disabled?");
      return;
    }
    try
    {
      bootstrapCkpt = curState.getCheckpoint().clone();
    }
    catch (Exception e)
    {
      String msg  = "Error copying checkpoint:" + curState.getCheckpoint();
      _log.error(msg, e);
      BootstrapResultMessage bootstrapResultMessage = BootstrapResultMessage.createBootstrapFailedMessage(e);
      doBootstrapFailed(bootstrapResultMessage);
      return;
    }

    if (!bootstrapCkpt.isBootstrapStartScnSet())
    {
      bootstrapCkpt = curState.getBstCheckpointHandler().createInitialBootstrapCheckpoint(bootstrapCkpt,
                                                                                          bootstrapCkpt.getWindowScn());
      //bootstrapCkpt.setBootstrapSinceScn(Long.valueOf(bootstrapCkpt.getWindowScn()));
    }

    _log.info("Bootstrap begin: sinceScn=" + bootstrapCkpt.getWindowScn());

    CheckpointMessage bootstrapCpMessage = CheckpointMessage.createSetCheckpointMessage(bootstrapCkpt);
    _sourcesConn.getBootstrapPuller().enqueueMessage(bootstrapCpMessage);

    try
    {
      Checkpoint cpForDispatcher = new Checkpoint(bootstrapCkpt.toString());
      cpForDispatcher.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
      DbusEvent cpEvent = getEventFactory().createCheckpointEvent(cpForDispatcher);
      writeEventToRelayDispatcher(curState, cpEvent, "Control Event to start bootstrap");
      curState.switchToBootstrapRequested();
    } catch (InterruptedException ie) {
      _log.error("Got interrupted while writing control message to bootstrap !!", ie);
      enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(ie));
    } catch (Exception e) {
      enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(e));
      _log.error("Exception occured while switching to bootstrap: ", e);
    }

    //wait for bootstrap to finish
    /*boolean bootstrapDone = false;
    while (! bootstrapDone)
    {
      try
      {
        bootstrapPullerThread.join();
        bootstrapDone = true;
      }
      catch (InterruptedException ie) {}
    }*/
  }

  private void mergeRelayCallsStats()
  {
    if (null != _sourcesConn.getLocalRelayCallsStatsCollector() &&
        null != _sourcesConn.getRelayCallsStatsCollector() &&
        _unmergedHttpCallsStats >= RELAY_CALLS_MERGE_FREQ)
    {
      _sourcesConn.getRelayCallsStatsCollector().merge(_sourcesConn.getLocalRelayCallsStatsCollector());
      _sourcesConn.getLocalRelayCallsStatsCollector().reset();
      _unmergedHttpCallsStats = 0;
    }
    else
    {
      ++_unmergedHttpCallsStats;
    }
  }

  private void processSourcesRequestError(ConnectionState state)
  {
  _log.info("Sources Request Error");

    if (null != _relayCallsStats) _relayCallsStats.registerInvalidSourceRequest();

    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  private void processSourcesResponseError(ConnectionState state)
  {
  _log.info("Sources Response Error");

    if (null != _relayCallsStats) _relayCallsStats.registerInvalidSourceRequest();
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  private void processRegisterRequestError(ConnectionState state)
  {
  _log.info("Register Request Error");
    if (null != _relayCallsStats) _relayCallsStats.registerInvalidRegisterCall();
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  private void processRegisterResponseError(ConnectionState state)
  {
  _log.info("Register Response Error");

    if (null != _relayCallsStats) _relayCallsStats.registerInvalidRegisterCall();
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  private void processStreamRequestError(ConnectionState state)
  {
  _log.info("Stream Request Error");

    if (null != _relayCallsStats) _relayCallsStats.registerInvalidStreamRequest();
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  private void processStreamResponseError(ConnectionState state)
  {
  _log.info("Stream Response Error");

    if (null != _relayCallsStats) _relayCallsStats.registerInvalidStreamRequest();
    if (toTearConnAfterHandlingResponse())
    {
      tearConnectionAndEnqueuePickServer();
    } else {
      state.switchToPickServer();
      enqueueMessage(state);
    }
  }

  @Override
  protected void resetConnection()
  {
    DatabusServerConnection conn =  _currentState.getRelayConnection();

    if ( null != conn)
    {
      conn.close();
      _currentState.setRelayConnection(null);
    }
  }

  private boolean onRelayFellOff(ConnectionState curState, Checkpoint cp, Throwable knownRemoteError)
      throws InvalidEventException, InterruptedException
  {
    boolean enqueueMessage = true;
    _log.error("Retries on SCNNotFoundException exhausted !!");
    _retriesOnFallOff.reset();

    //need to bootstrap, suspend or read From Latest SCN
    if (!_sourcesConn.isBootstrapEnabled())
    {
      _log.error("No scn found on relay while no bootstrap services provided:");
      _log.error(" bootstrapServices=" + _sourcesConn.getBootstrapServices() +
          "; bootstrapRegistrations=" + _sourcesConn.getBootstrapRegistrations());

      if (_isReadLatestScnOnErrorEnabled)
      {
        // Read From Latest SCN on Error
        _log.error("Read Latest SCN Window on SCNNotFoundException is enabled. Will start reading from the lastest SCN Window !!");
        curState.getRelayConnection().enableReadFromLatestScn(true);
          _currentState.setRelayFellOff(false);
        curState.switchToStreamResponseDone();
      } else {
        _log.fatal("Got SCNNotFoundException but Read Latest SCN Window and bootstrap are disabled !!");
        _remoteExceptionHandler.handleException(new BootstrapDatabaseTooOldException(knownRemoteError));
        enqueueMessage = false;
      }
    } else {
      _log.info("Requested scn " + cp.getWindowScn() +
          " not found on relay; switching to bootstrap service");
      curState.switchToBootstrap(cp);
      UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
      if (unifiedClientStats != null)
      {
        unifiedClientStats.setBootstrappingState(true);
      }
    }
    return enqueueMessage;
  }

  @Override
  protected void tearConnection()
  {
    _currentState.setRelayFellOff(false);
    super.tearConnection();
  }

  /**
   * allows adjusting noEvents threshold dynamically
   * @param noEventsConnectionResetTimeSec - new threshold
   */
  public void setNoEventsConnectionResetTimeSec(long noEventsConnectionResetTimeSec)
  {
    _noEventsConnectionResetTimeSec = noEventsConnectionResetTimeSec;
  }

  protected BackoffTimer getRetryonFallOff()
  {
    return _retriesOnFallOff;
  }

  private void writeEventToRelayDispatcher(ConnectionState curState, DbusEvent event, String message)
          throws InterruptedException, InvalidEventException
  {
    boolean success = false;

    // Create a infinite backoff timer that waits for maximum of 1 sec
    // for writing the control message to evb
    BackoffTimerStaticConfig timerConfig = new BackoffTimerStaticConfig(
        1, 1000, 1, 1, -1);
    BackoffTimer timer = new BackoffTimer("EVB More Space Timer",
        timerConfig);
    timer.reset();

    byte[] eventBytes = new byte[event.size()];

    _log.info("Event size: " + eventBytes.length);
    _log.info("Event:" + event.toString());

    event.getRawBytes().get(eventBytes);

    UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();

    while ((!success) && (timer.getRemainingRetriesNum() > 0))
    {
      ByteArrayInputStream cpIs = new ByteArrayInputStream(
          eventBytes);
      ReadableByteChannel cpRbc = Channels.newChannel(cpIs);

      sendHeartbeat(unifiedClientStats);
      int ecnt = curState.getDataEventsBuffer().readEvents(cpRbc);
      if (ecnt <= 0) {
        _log.error("Not enough spece in the event buffer to add a control message :" + message);
        boolean interrupted = !timer.backoffAndSleep();

        if (interrupted)
          throw new InterruptedException(
              "Got interrupted while waiting to write control Message to EVB : " + message);
      } else {
        _log.info("Sent a control message :" + message);
        success = true;
      }
    }
   }

  public DatabusRelayConnection getLastOpenConnection()
  {
    return _lastOpenConnection;
  }
}
TOP

Related Classes of com.linkedin.databus.client.RelayPullThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.