/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common Development
* and Distribution License("CDDL") (collectively, the "License"). You
* may not use this file except in compliance with the License. You can obtain
* a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
* or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each
* file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
* Sun designates this particular file as subject to the "Classpath" exception
* as provided by Sun in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the License
* Header, with the fields enclosed by brackets [] replaced by your own
* identifying information: "Portions Copyrighted [year]
* [name of copyright owner]"
*
* Contributor(s):
*
* If you wish your version of this file to be governed by only the CDDL or
* only the GPL Version 2, indicate your decision by adding "[Contributor]
* elects to include this software in this distribution under the [CDDL or GPL
* Version 2] license." If you don't indicate a single choice of license, a
* recipient has the option to distribute your version of this file under
* either the CDDL, the GPL Version 2 or to extend the choice of license to
* its licensees as provided above. However, if you add GPL Version 2 code
* and therefore, elected the GPL Version 2 license, then the option applies
* only if the new code is made subject to such option by the copyright
* holder.
*/
/*
* ReplicationHealthChecker.java
*
* Created on July 19, 2006, 2:02 PM
*
*/
package com.sun.enterprise.ee.web.sessmgmt;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.sun.logging.LogDomains;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.catalina.LifecycleException;
import com.sun.enterprise.web.ReplicationReceiver;
import com.sun.enterprise.web.ServerConfigLookup;
import com.sun.enterprise.config.serverbeans.Cluster;
import com.sun.enterprise.config.serverbeans.ServerRef;
import com.sun.enterprise.ee.cms.core.*;
import com.sun.enterprise.ee.cms.spi.*;
import java.util.Iterator;
/**
*
* @author Larry White
*/
public class ReplicationHealthChecker implements Runnable {
/**
* The logger to use for logging ALL web container related messages.
*/
private static final Logger _logger
= LogDomains.getLogger(LogDomains.WEB_LOGGER);
private static final Logger _pipelogger = ReplicationUtil.getPipeLogger();
private static GroupManagementService _gms = null;
static {
try {
_gms = GMSFactory.getGMSModule(ReplicationUtil.getClusterName());
} catch (Exception ex) {
_logger.log(Level.WARNING, "error occurred acquiring GMS", ex);
}
ReplicationUtil.configureLoadBalancer();
}
static final int INSTANCE_STARTING = 0;
static final int INSTANCE_OK = 1;
static final int INSTANCE_FAILED = 2;
static final int INSTANCE_UNKNOWN = 3;
public static final int RESHAPE_MESSAGE_ARRIVAL_TIME = 120; //seconds
/**
* time of most recent join
*/
private static AtomicLong _mostRecentJoinTime = new AtomicLong(-1L);
/**
* time of most recent reshape
*/
private static AtomicLong _mostRecentReshapeTime = new AtomicLong(-1L);
/**
* The flag which reflects whether replication partner is operational
*/
private static boolean _replicationPartnerOperationalFlag = true;
/**
* The flag which reflects whether pipes are connected
*/
private static volatile boolean _replicationCommunicationOperationalFlag = false;
/**
* The flag which reflects whether Replication health check is enabled
* This is cached and updated by the health check thread
*/
private BooleanWrapper _healthCheckEnabledFlag = new BooleanWrapper();
/**
* The singleton instance of ReplicationHealthChecker
*/
private static final ReplicationHealthChecker _soleInstance
= new ReplicationHealthChecker();
/**
* The singleton instance of ReplicationReceiver
*/
private static ReplicationReceiver _replicationReceiver = null;
/**
* a monitor obj for synchronization
*/
private static final Object _monitor = new Object();
/**
* a monitor obj for unload thread synchronization
*/
private static final Object _unloadMonitor = new Object();
/**
* a runtime health check error has been reported once
*/
protected static boolean runtimeHealthCheckExceptionReported = false;
/**
* a runtime health failure error has been reported once
*/
protected static boolean runtimeHealthFailureErrorReported = false;
/**
* a flag to indicate instance is stopping
*/
private static final AtomicBoolean stoppingFlag = new AtomicBoolean(false);
/**
* number of sender dispatch threads running
* normally should be 2 - used to control countdownlatch
* for instance shutdown - see ReplicationLifecycleImpl
*/
private static final AtomicInteger dispatchThreadCount = new AtomicInteger(0);
/**
* a flag to indicate instance is flushing
*/
private static final AtomicBoolean flushingFlag = new AtomicBoolean(false);
/**
* a flag to indicate instance thread waiting for flush
*/
private static final AtomicBoolean flushThreadWaitingFlag = new AtomicBoolean(false);
/**
* a countdown latch used by unload logic in ReplicationLifeCycleImpl
*/
private static CountDownLatch doneSignal = null;
/**
* The sleep interval in seconds
*/
private static int _sleepIntervalSeconds = 5;
/**
* the replication health check interval in seconds
*/
protected int replicationHealthcheckIntervalInSeconds = -1;
/**
* Has this component been started yet?
*/
protected boolean started = false;
/**
* The background thread.
*/
protected Thread thread = null;
/**
* The background thread completion semaphore.
*/
protected volatile boolean threadDone = false;
/**
* instance started time
*/
private volatile long instanceStartTime = -1L;
/**
* Name to register for the background thread.
*/
protected String _threadName = "ReplicationHealthChecker";
/**
* Set of instance names added to cluster since
* the cluster started
*/
protected Set _extraInstanceNames = new HashSet();
/** Return the singleton instance
* lazily creates a new instance of ReplicationHealthChecker if not created yet
*/
public static ReplicationHealthChecker getInstance() {
return _soleInstance;
}
/** Creates a new instance of ReplicationHealthChecker */
public ReplicationHealthChecker() {
_threadName = "ReplicationHealthChecker";
}
/**
* Return the thread name for this Store.
*/
public String getThreadName() {
return(_threadName);
}
/**
* Return the instance start time
*/
public long getInstanceStartTime() {
return(instanceStartTime);
}
/**
* set the instance start time
*/
public void setInstanceStartTime(long value) {
instanceStartTime = value;
}
/**
* is the server started
*/
public boolean isInstanceStarted() {
return(getTimeSinceInstanceStart() >= 0L);
}
public boolean isTimeSinceInstanceStartLessThan(long duration) {
if(!isInstanceStarted()) {
return false;
}
return (getTimeSinceInstanceStart() < duration);
}
/**
* get the time since start time (millis)
*/
public long getTimeSinceInstanceStart() {
long sTime = getInstanceStartTime();
if(sTime == -1L) {
return -1L;
} else {
return (System.currentTimeMillis() - sTime);
}
}
/**
* Get the replication receiver
*/
public static ReplicationReceiver getReplicationReceiver() {
return _replicationReceiver;
}
/**
* set the replication receiver
*/
public static void setReplicationReceiver(ReplicationReceiver replicationReceiver) {
_replicationReceiver = replicationReceiver;
}
/**
* add a newly started instance name to the cluster
* (beyond those defined at cluster or instance startup time)
* if admin framework already is aware of this instance
* do not add it
*/
void addIfExtraInstance(String extraInstanceName) {
List currentMembersViaAdmin
= getCurrentGroupMembersViaAdmin();
if(!currentMembersViaAdmin.contains(extraInstanceName)) {
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("adding extra instance to admin view extras: " + extraInstanceName);
}
_extraInstanceNames.add(extraInstanceName);
}
JxtaReplicationSender sender = JxtaReplicationSender.createInstance();
sender.initJxtaReplicationUnicastSenderFor(extraInstanceName);
}
private boolean isReplicationUnicastBatchingEnabled() {
ServerConfigLookup lookup = new ServerConfigLookup();
return lookup.isReplicationUnicastBatchingEnabled();
}
/**
* remove an instance name from the extraInstances set
*/
protected void removeIfExtraInstance(String extraInstanceName) {
List currentMembersViaAdmin
= getCurrentGroupMembersViaAdmin();
if(!currentMembersViaAdmin.contains(extraInstanceName)) {
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("removing extra instance from admin view extras: " + extraInstanceName);
}
_extraInstanceNames.remove(extraInstanceName);
}
JxtaReplicationSender sender = JxtaReplicationSender.createInstance();
sender.removeJxtaReplicationUnicastSenderFor(extraInstanceName);
}
/**
* is the JxtaBiDiPipeWrapper in the middle of attempting
* connection
*/
public static boolean isAttemptingConnection() {
JxtaReplicationReceiver jxtaReplicationReceiver
= JxtaReplicationReceiver.createInstance();
JxtaBiDiPipeWrapper jxtaBiDiPipeWrapper
= jxtaReplicationReceiver.getJxtaBiDiPipeWrapper();
if(jxtaBiDiPipeWrapper == null) {
return true;
} else {
return jxtaBiDiPipeWrapper.isAttemptingConnection();
}
}
/**
* Get the flag which reflects whether replication partner is operational
*/
public static boolean isReplicationPartnerOperational() {
return _replicationPartnerOperationalFlag;
}
/**
* Set the flag which reflects whether replication partner is operational
*/
public static void setReplicationPartnerOperational(boolean value) {
synchronized(_monitor) {
_replicationPartnerOperationalFlag = value;
}
}
/**
* Get the flag which reflects whether replication partner is operational
*/
public static boolean isReplicationCommunicationOperational() {
synchronized(_monitor) {
return _replicationCommunicationOperationalFlag;
}
}
/**
* Set the flag which reflects whether replication partner is operational
*/
public static void setReplicationCommunicationOperational(boolean value) {
setReplicationCommunicationOperational(value, true);
}
/**
* Set the flag which reflects whether replication partner is operational
* @param value the value
* @param report count this as a real failure and report; else just silent setter
*/
public static void setReplicationCommunicationOperational(boolean value, boolean report) {
synchronized(_monitor) {
boolean previousValue = _replicationCommunicationOperationalFlag;
_replicationCommunicationOperationalFlag = value;
if(report) {
if(previousValue && !value) {
reportReplicationHealthChange("ReplicationHealthChecker:replication health failure:stopping replication");
} else if(!previousValue && value) {
reportReplicationHealthChange("ReplicationHealthChecker:replication health now ok: currentPartner: " + staticGetReshapeReplicateToInstanceName(null, 0L));
}
}
}
}
public static void reportError(String message) {
//do not log if we are deliberately in the midst of stopping
if(isStopping()) {
return;
}
synchronized(_monitor) {
if(!runtimeHealthFailureErrorReported) {
runtimeHealthFailureErrorReported = true;
_logger.warning(message);
}
}
}
public static void reportOk(String message) {
//do not log if we are deliberately in the midst of stopping
if(isStopping()) {
return;
}
synchronized(_monitor) {
if(runtimeHealthFailureErrorReported) {
_logger.warning(message);
}
runtimeHealthFailureErrorReported = false;
}
}
public static void reportReplicationHealthChange(String message) {
//do not log if we are deliberately in the midst of stopping
if(isStopping()) {
return;
}
_logger.warning(message);
}
private boolean doPipesExist() {
//FIXME may need this to be more dynamic check later
return isReplicationCommunicationOperational();
}
/**
* can I do a successful ping on a bidi pipe
*/
boolean doPipeTest() {
ReplicationState testState = createHealthPingState();
testState.setAckRequired(true);
ReplicationState resultState = doTransmit(testState);
return (resultState != null);
}
/**
* can I do a successful ping on a a particular bidi pipe
* @param pipeWrapper the PipeWrapper to test
*/
boolean doPipeTest(PipeWrapper pipeWrapper) {
ReplicationState testState = createHealthPingState(pipeWrapper.getName());
testState.setAckRequired(true);
ReplicationState resultState = doTransmit(testState, pipeWrapper);
return (resultState != null);
}
/**
* send health message to test connectivity
* @param transmitState
*/
protected ReplicationState doTransmit(ReplicationState transmitState) {
JxtaReplicationSender replicationSender =
JxtaReplicationSender.createInstance();
ReplicationState resultState =
replicationSender.sendReplicationStateHC(transmitState);
return resultState;
}
/**
* send health message to test connectivity
* @param transmitState
* @param pipeWrapper
*/
protected ReplicationState doTransmit(ReplicationState transmitState, PipeWrapper pipeWrapper) {
JxtaReplicationSender replicationSender =
JxtaReplicationSender.createInstance();
ReplicationState resultState =
replicationSender.sendReplicationStateHC(transmitState, pipeWrapper);
return resultState;
}
ReplicationState createHealthPingState() {
return new ReplicationState(
ReplicationState.MODE_WEB, //mode
"pingtest", //id
"pingappid", //appid
0L, //version
0L, //lastAccess
0L, //maxInactiveInterval
null, //extraParam
null, //queryResult
null, //instanceName
ReplicationState.HC_COMMAND, //command
null, //state
null, //trunkState
null); //containerExtraParamsState
}
ReplicationState createHealthPingState(String name) {
return new ReplicationState(
ReplicationState.MODE_WEB, //mode
("pingtest" + name), //id
"pingappid", //appid
0L, //version
0L, //lastAccess
0L, //maxInactiveInterval
name, //extraParam
null, //queryResult
null, //instanceName
ReplicationState.HC_COMMAND, //command
null, //state
null, //trunkState
null); //containerExtraParamsState
}
/**
* return boolean reflecting whether it is ok to proceed
* with replication processing
*/
public static boolean isOkToProceed() {
/* FIXME we can put this back later
if( !isHealthCheckingEnabled() ) {
return true;
}
*/
//flushing time is treated specially
if(isFlushing()) {
return true;
}
//cluster stopping time is treated specially
if(isClusterStopping()) {
return false;
}
//in the midst of attempting connection
if(isAttemptingConnection()) {
return false;
}
boolean condition = isReplicationPartnerOperational()
&& isReplicationCommunicationOperational();
if(condition) {
return true;
}
synchronized(_monitor) {
if(!condition) {
reportError("ReplicationHealthChecker:health failure " +
" isReplicationPartnerOperational()=" + isReplicationPartnerOperational() +
" isReplicationCommunicationOperational()=" + isReplicationCommunicationOperational());
}
}
return condition;
}
/**
* Perform the Replication health check and take action as appropriate
* defaults to doing full (not quick) check
*/
protected boolean doReplicationHealthCheck() {
return doReplicationHealthCheck(false);
}
/**
* Perform the Replication health check and take action as appropriate
* @param quickCheck - skip shutdown logic
*/
protected boolean doReplicationHealthCheck(boolean quickCheck) {
//if health checking not enabled skip all checking; return true
if(!isHealthCheckingEnabled()) {
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("health check disabled - skipping");
}
return true;
}
//to avoid incorrect log messages we need to check this
//both here and after isDatabaseOk()
if(runtimeHealthCheckExceptionReported) {
return true;
}
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("health check enabled - entering isReplicationPartnerOk()");
}
boolean replicationPartnerAlive = this.isReplicationPartnerOk();
boolean lastReplicationPartnerState = isReplicationPartnerOperational();
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("ReplicationHealthCheck: replication partner is alive: " + replicationPartnerAlive);
}
//DbState may have reported non-operational state
//which was really -20 (mis-configuration) so abort and return
//true in this case (for now)
//or if an HADB agent mis-configuration was detected
/* FIXME fix this later
if(runtimeHealthCheckExceptionReported || this.hasAgentConfigErrorOccurred()) {
return true;
}
*/
//now check for existence of replication pipes
//but only if replication was previously not alive
//and replication is now reported alive (i.e. replication is newly healthy
//otherwise there is no point
boolean pipesExist = true;
if(!lastReplicationPartnerState && replicationPartnerAlive) {
pipesExist = doPipesExist();
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("ReplicationHealthCheck: pipes exist: " + pipesExist);
}
}
boolean healthyWithPipes = replicationPartnerAlive && pipesExist;
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("ReplicationHealthCheck: replica healthyWithPipes: " + healthyWithPipes);
}
ReplicationHealthChecker.setReplicationPartnerOperational(healthyWithPipes);
//ReplicationHealthChecker.setReplicationPartnerOperational(replicationPartnerAlive);
/* FIXME want similar code here
this.issueHealthWarning(lastReplicationPartnerState, replicationPartnerAlive, pipesExist);
*/
//only do cleanup is replication is newly unhealthy
if(lastReplicationPartnerState && !replicationPartnerAlive) {
//do replication cleanup
//skip if doing quickCheck
if(!quickCheck) {
this.doReplicationShutdownCleanup();
}
}
return isReplicationPartnerOperational();
}
/**
* Perform the replication related cleanup
* i.e. closing out pipes
*/
private void doReplicationShutdownCleanup() {
//FIXME this should close the pipes
}
String getInstanceName() {
return ReplicationUtil.getInstanceName();
}
public static String staticGetReshapeReplicateToInstanceName(String formerPartnerInstance, long sleepTime) {
return _soleInstance.getReshapeReplicateToInstanceName(formerPartnerInstance, sleepTime);
}
public String getReshapeReplicateToInstanceName(String formerPartnerInstance) {
return getReshapeReplicateToInstanceName(formerPartnerInstance, 3000L);
}
public String getReshapeReplicateToInstanceName(String formerPartnerInstance, long sleepTime) {
String myName = this.getInstanceName();
SimpleInstanceArranger arranger = new SimpleInstanceArranger();
//FIXME put this back when ready to test non-standalone
try {
Thread.currentThread().sleep(sleepTime);
} catch (InterruptedException ex) {
//deliberate no-op
}
List instanceNames = getConservativeMemberList(formerPartnerInstance);
arranger.init(instanceNames);
String result = arranger.getReplicaPeerName(myName);
if (_logger.isLoggable(Level.FINE)) {
_logger.fine("getReplicaPeerName = " + result);
}
//return arranger.getReplicaPeerName(myName);
return result;
}
public String getReshapeReplicateToInstanceName(String formerPartnerInstance, String sourceInstanceName, long sleepTime) {
String myName = sourceInstanceName;
SimpleInstanceArranger arranger = new SimpleInstanceArranger();
//FIXME put this back when ready to test non-standalone
try {
Thread.currentThread().sleep(sleepTime);
} catch (InterruptedException ex) {
//deliberate no-op
}
List instanceNames = getConservativeMemberList(formerPartnerInstance);
arranger.init(instanceNames);
String result = arranger.getReplicaPeerName(myName);
if (_logger.isLoggable(Level.FINE)) {
_logger.fine("getReplicaPeerName = " + result);
}
//return arranger.getReplicaPeerName(myName);
return result;
}
public String getReshapeReplicateFromInstanceName() {
return getReshapeReplicateFromInstanceName(null);
}
public String getReshapeReplicateFromInstanceName(String failedInstance) {
String myName = this.getInstanceName();
SimpleInstanceArranger arranger = new SimpleInstanceArranger();
List instanceNames = getConservativeMemberList(null);
//displayCurrentGroupMembers();
//insure that failed instance is considered
//due to race condition GMS may have already removed it from
//membership view
if(failedInstance != null) {
if(!instanceNames.contains(failedInstance)) {
instanceNames.add(failedInstance);
}
}
arranger.init(instanceNames);
return arranger.getReplicatedFromPeerName(myName);
}
public String getCurrentPartnerInstanceName() {
JxtaSenderPipeManager jxtaSenderPipeManager
= JxtaSenderPipeManager.createInstance();
return jxtaSenderPipeManager.getPartnerInstanceName();
}
boolean isCurrentReplicateToPartner(String instanceName) {
String currentPartnerInstanceName
= getCurrentPartnerInstanceName();
return(currentPartnerInstanceName != null && currentPartnerInstanceName.equalsIgnoreCase(instanceName));
}
boolean isCurrentReplicateFromPartner(String instanceName) {
JxtaReceiverPipeManager jxtaReceiverPipeManager
= JxtaReceiverPipeManager.createInstance();
return (jxtaReceiverPipeManager.getHealthPipeWrapper(instanceName) != null);
}
boolean isReplicateFromPartner(String instanceName) {
String replicateFromInstanceName
= getReshapeReplicateFromInstanceName();
return(replicateFromInstanceName != null && replicateFromInstanceName.equalsIgnoreCase(instanceName));
}
boolean isReplicateToPartner(String instanceName) {
String replicateToInstanceName
= getReshapeReplicateToInstanceName(null);
return(replicateToInstanceName != null && replicateToInstanceName.equalsIgnoreCase(instanceName));
}
public void displayCurrentGroupMembers() {
if (_logger.isLoggable(Level.FINE)) {
List coreMembers = this.getCurrentGroupMembersViaGMS();
for(int i=0; i<coreMembers.size(); i++) {
_logger.fine("member[" + i + "]=" + coreMembers.get(i));
}
}
}
public List getLbEnabledList() {
List all = getConservativeMemberList(null);
List lb = new ArrayList();
int size = all.size();
for (int i = 0; i < size; i++) {
String s = (String) all.get(i);
if (isServerLbEnabled(s))
lb.add(s);
}
return lb;
}
public List getConservativeMemberList(String formerPartnerInstance) {
List adminList = getCurrentGroupMembersViaAdminAndExtras();
List gmsList = getCurrentGroupMembersViaGMS();
if(formerPartnerInstance != null) {
adminList.remove(formerPartnerInstance);
gmsList.remove(formerPartnerInstance);
}
if(gmsList.size() <= adminList.size()) {
return gmsList;
} else {
return adminList;
}
}
public static void displayStringList(List<String> stringList) {
for(int i=0; i<stringList.size(); i++) {
_logger.log(Level.INFO, "displayStringList:elem[" + i + "] = " + stringList.get(i));
}
}
public List getCurrentGroupMembersViaAdminAndExtras() {
ArrayList instanceNames = (ArrayList)getCurrentGroupMembersViaAdmin();
Iterator it = ((Collection)_extraInstanceNames).iterator();
while(it.hasNext()) {
String nextExtra = (String)it.next();
if(!isStringContainedInList(nextExtra, instanceNames)) {
instanceNames.add(nextExtra);
}
}
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("getCurrentGroupMemebersVisAdminAndExtras");
displayStringList(instanceNames);
}
return instanceNames;
}
private boolean isStringContainedInList(String aString, ArrayList list) {
boolean result = false;
for(int i=0; i<list.size(); i++) {
String nextListElem = (String)list.get(i);
if(nextListElem.equalsIgnoreCase(aString)) {
result = true;
break;
}
}
return result;
}
public List getCurrentGroupMembersViaAdmin() {
ServerConfigLookup lookup = new ServerConfigLookup();
ArrayList instanceNames = lookup.getServerNamesInCluster();
return instanceNames;
}
public List<String> getCurrentGroupMembersViaGMSForLoad() {
List<String> coreMembers = new ArrayList<String>();
try {
if(_gms == null) {
_gms = GMSFactory.getGMSModule(getClusterName());
if(_gms == null) {
return coreMembers;
}
}
GroupHandle groupHandle = _gms.getGroupHandle();
//if less than 120 seconds since last join was seen use
//getCurrentAliveAndReadyMembers
if( (System.currentTimeMillis() - getMostRecentJoinTime()) < (120 * 1000)
|| getMostRecentJoinTime() == -1L) {
coreMembers = getCurrentAliveAndReadyMembers(groupHandle);
} else {
coreMembers = groupHandle.getCurrentCoreMembers();
}
}
catch(GMSNotInitializedException ex1) {
//FIXME what to do
}
catch(GMSNotEnabledException ex2) {
//FIXME what to do
}
catch(GMSException ex3) {
//FIXME what to do
}
return coreMembers;
}
public int getCurrentGroupMembersSizeViaGMS() {
return getCurrentGroupMembersViaGMS().size();
}
public List<String> getCurrentGroupMembersViaGMS() {
List<String> coreMembers = new ArrayList<String>();
try {
if(_gms == null) {
_gms = GMSFactory.getGMSModule(getClusterName());
if(_gms == null) {
return coreMembers;
}
}
GroupHandle groupHandle = _gms.getGroupHandle();
coreMembers = groupHandle.getCurrentCoreMembers();
}
catch(GMSNotInitializedException ex1) {
//FIXME what to do
}
catch(GMSNotEnabledException ex2) {
//FIXME what to do
}
catch(GMSException ex3) {
//FIXME what to do
}
return coreMembers;
}
public List<String> getCurrentAliveAndReadyMembers(GroupHandle gh) {
List<String> members = gh.getCurrentCoreMembers();
List<String> currentAliveOrReadyMembers = new ArrayList<String>();
for (String member : members) {
final long HEARTBEAT_THRESHOLD_MILLISECS = 10000; // okay if hb info is up to 10 seconds old.
final long TIMEOUT_MS = 0; // do not perform a network lookup of members state, just return UNKNOWN
// if no local cache info on member status that has been updated in
//HEARTBEAT_THRESHOLD_MILLISECS
MemberStates state = gh.getMemberState(member, HEARTBEAT_THRESHOLD_MILLISECS, TIMEOUT_MS);
if (state == MemberStates.ALIVEANDREADY
|| state == MemberStates.READY) {
currentAliveOrReadyMembers.add(member);
}
}
return currentAliveOrReadyMembers;
}
public static boolean isServerLbEnabled(String instanceName) {
ServerConfigLookup scl = new ServerConfigLookup();
Cluster c = scl.getCluster();
if (c == null) {
return false;
}
ServerRef sr = c.getServerRefByRef(instanceName);
if (sr == null) {
return false;
}
return sr.isLbEnabled();
}
public MemberStates getMemberState(String instanceName, GroupHandle gh, long heartbeatThreshold, long memberResponseTimeout) {
return gh.getMemberState(instanceName, heartbeatThreshold, memberResponseTimeout);
}
public boolean isReplicationSourceInstanceAlive(String instanceName) {
int state = determineMemberStateAfterPipeFailure(instanceName);
return (state == INSTANCE_STARTING) || (state == INSTANCE_OK);
}
public int determineMemberStateAfterPipeFailure(String instanceName) {
final long NO_CACHE_LOOKUP = 0;
final long MEMBER_RESPONSE_TIMEOUT = 10 * 1000; // 10 second wait
return determineMemberStateAfterPipeFailure(instanceName, NO_CACHE_LOOKUP, MEMBER_RESPONSE_TIMEOUT);
}
public int determineMemberStateAfterPipeFailureRetry(String instanceName) {
final long HEARTBEAT_THRESHOLD = 4 * 1000;
final long MEMBER_RESPONSE_TIMEOUT = 1 * 1000; // 1 second wait
return determineMemberStateAfterPipeFailure(instanceName, HEARTBEAT_THRESHOLD, MEMBER_RESPONSE_TIMEOUT);
}
public int determineMemberStateAfterPipeFailure(String instanceName, long heartbeatThreshold, long memberResponseTimeout) {
int result = 0;
MemberStates state = null;
try {
if(_gms == null) {
_gms = GMSFactory.getGMSModule(getClusterName());
if(_gms == null) {
return 0;
}
}
GroupHandle groupHandle = _gms.getGroupHandle();
state = getMemberState(instanceName, groupHandle, heartbeatThreshold, memberResponseTimeout);
switch (state) {
case STARTING:
case ALIVE:
//instance is starting so just wait for join
result = INSTANCE_STARTING;
break;
case READY:
case ALIVEANDREADY:
// reconnect to original replica partner
result = INSTANCE_OK;
break;
case INDOUBT:
case DEAD:
case STOPPED:
case CLUSTERSTOPPING:
case PEERSTOPPING:
//instance is down so treat as failure
result = INSTANCE_FAILED;
break;
case UNKNOWN:
//instance is down so treat as failure
result = INSTANCE_UNKNOWN;
break;
default:
//treat default as a failure too
result = INSTANCE_FAILED;
break;
}
}
catch(GMSNotInitializedException ex1) {
//FIXME what to do
}
catch(GMSNotEnabledException ex2) {
//FIXME what to do
}
catch(GMSException ex3) {
//FIXME what to do
}
if (_pipelogger.isLoggable(Level.FINE)) {
_pipelogger.fine("determineMemberStateAfterPipeFailure() MAPPING: " + state + " to " + result);
}
return result;
}
private String getClusterName() {
ServerConfigLookup lookup = new ServerConfigLookup();
return lookup.getClusterName();
}
/**
* do the health-check call to determine if partner is ok
*/
public boolean isReplicationPartnerOk() {
//FIXME work out how to actively check health GMS
boolean result = true;
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("isReplicationPartnerOk() begin:runtimeHealthCheckExceptionReported: " + runtimeHealthCheckExceptionReported);
}
return true;
}
protected int getReplicationHealthcheckIntervalInSecondsFromConfig() {
//FIXME add config.getReplicationHealthcheckIntervalInSecondsFromConfig
ServerConfigLookup config = new ServerConfigLookup();
return config.getHaStoreHealthcheckIntervalInSecondsFromConfig();
}
protected int getReplicationHealthcheckIntervalInSeconds() {
if(replicationHealthcheckIntervalInSeconds > 0) {
return replicationHealthcheckIntervalInSeconds;
}
replicationHealthcheckIntervalInSeconds =
this.getReplicationHealthcheckIntervalInSecondsFromConfig();
return replicationHealthcheckIntervalInSeconds;
}
boolean isStarted() {
return started;
}
public static boolean isStopping() {
return stoppingFlag.get();
}
static void setStopping(boolean value) {
if(value) {
setReplicationCommunicationOperational(value, false);
}
stoppingFlag.set(value);
}
public static boolean isFlushing() {
return flushingFlag.get();
}
static void setFlushing(boolean value) {
flushingFlag.set(value);
}
public static boolean isFlushThreadWaiting() {
return flushThreadWaitingFlag.get();
}
public static void incrementDispatchThreadCount() {
dispatchThreadCount.incrementAndGet();
}
public static int getDispatchThreadCount() {
return dispatchThreadCount.get();
}
public static void setFlushThreadWaiting(boolean value) {
flushThreadWaitingFlag.set(value);
}
public synchronized static CountDownLatch getDoneSignal() {
if(doneSignal == null) {
int numberOfDispatchThreads = getDispatchThreadCount();
doneSignal = new CountDownLatch(numberOfDispatchThreads);
}
return doneSignal;
}
public static Object getUnloadMonitor() {
return _unloadMonitor;
}
public static long getMostRecentJoinTime() {
return _mostRecentJoinTime.get();
}
static void setMostRecentJoinTime(long joinTime) {
_mostRecentJoinTime.set(joinTime);
}
public static long getMostRecentReshapeTime() {
return _mostRecentReshapeTime.get();
}
static void setMostRecentReshapeTime(long joinTime) {
_mostRecentReshapeTime.set(joinTime);
}
public static boolean isLastReshapeWithin(long timeWindowMillis) {
long lastReshapeTime = getMostRecentReshapeTime();
return(lastReshapeTime != -1L
&& (System.currentTimeMillis() - lastReshapeTime) < timeWindowMillis);
}
public static boolean checkForReshapeMessageArrivalWithin(int durationInSeconds) {
boolean done = false;
boolean result = false;
int counter = 0;
long sleepInterval = 100L;
while (!done) {
if(sleepInterval > durationInSeconds * 1000L) {
done = true;
result = false;
break;
}
if(isLastReshapeWithin(durationInSeconds * 1000L)) {
done = true;
result = true;
break;
}
try {
Thread.sleep(sleepInterval);
} catch (InterruptedException e) {
}
counter++;
sleepInterval *= 2;
}
return result;
}
public boolean isPipeInitializationCalled() {
if(_replicationReceiver == null) {
return false;
}
return ((JxtaReplicationReceiver)_replicationReceiver).isPipeInitializationCalled();
}
public static boolean isClusterStopping() {
if(_gms == null) {
return false;
} else {
return _gms.isGroupBeingShutdown(ReplicationUtil.getClusterName());
}
}
Thread getThread() {
return thread;
}
/**
* Sleep for the duration specified by the <code>_sleepIntervalSeconds</code>
* property.
*/
protected void threadSleep() {
_sleepIntervalSeconds =
this.getReplicationHealthcheckIntervalInSeconds();
try {
Thread.sleep(_sleepIntervalSeconds * 1000L);
} catch (InterruptedException e) {
;
}
}
/**
* The background thread that checks for replication partner health.
*/
public void run() {
// Loop until the termination semaphore is set
while (!threadDone) {
threadSleep();
doReplicationHealthCheck();
}
}
/**
* This is done at start time
*/
private static void initializeHealthCheckEnabledFlag() {
ServerConfigLookup config = new ServerConfigLookup();
BooleanWrapper healthCheckEnabledWrapper
= getInstance().getHealthCheckEnabledWrapper();
if(healthCheckEnabledWrapper != null) {
healthCheckEnabledWrapper.setValue(config.getHadbHealthCheckFromConfig());
}
}
BooleanWrapper getHealthCheckEnabledWrapper() {
return _healthCheckEnabledFlag;
}
private static boolean isHealthCheckingEnabled() {
/* FIXME for now just return true
BooleanWrapper healthCheckEnabledWrapper
= getInstance().getHealthCheckEnabledWrapper();
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("isHealthCheckingEnabled() reporting: "
+ getInstance().getHealthCheckEnabledWrapper().getValue());
}
return healthCheckEnabledWrapper.getValue();
*/
return true;
}
/**
* Prepare for the beginning of active use of the public methods of this
* component. This method should be called after <code>configure()</code>,
* and before any of the public methods of the component are utilized.
*
* @exception IllegalStateException if this component has already been
* started
* @exception LifecycleException if this component detects a fatal error
* that prevents this component from being used
*/
public void start() throws LifecycleException {
//initialize the wrapper
initializeHealthCheckEnabledFlag();
//do not start if replication health check is not enabled
//FIXME may want to reconsider this decision
/* for now taking out
if(!isHealthCheckingEnabled()) {
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("Starting - Replication health checking not enabled");
}
return;
}
*/
if(started) {
return;
}
if(_logger.isLoggable(Level.FINEST)) {
_logger.finest("Replication health checking enabled");
}
/* FIXME do later
this.registerAdminEvents();
*/
// Start the background health-check thread
threadStart();
started = true;
}
/**
* Gracefully terminate the active use of the public methods of this
* component. This method should be the last one called on a given
* instance of this component.
*
* @exception IllegalStateException if this component has not been started
* @exception LifecycleException if this component detects a fatal error
* that needs to be reported
*/
public void stop() throws LifecycleException {
if(!started) {
return;
}
/* FIXME add this later
this.unregisterAdminEvents();
*/
// Stop the background health-check thread
threadStop();
started = false;
}
/**
* Start the background thread that will periodically check
* the health of replication.
*/
protected void threadStart() {
if (thread != null)
return;
threadDone = false;
thread = new Thread(this, getThreadName());
thread.setDaemon(true);
thread.start();
}
/**
* Stop the background thread that is periodically checking for
* session timeouts.
*/
protected void threadStop() {
if (thread == null)
return;
threadDone = true;
thread.interrupt();
try {
thread.join();
} catch (InterruptedException e) {
;
}
thread = null;
}
class BooleanWrapper {
/** Creates a new instance of BooleanWrapper */
public BooleanWrapper() {
}
/** Creates a new instance of BooleanWrapper */
public BooleanWrapper(boolean value) {
_value = new Boolean(value);
}
synchronized boolean getValue() {
//default value false
if(!isInitialized()) {
return false;
} else {
return _value.booleanValue();
}
}
synchronized void setValue(boolean value) {
_value = new Boolean(value);
}
boolean isInitialized() {
return _value != null;
}
Boolean _value = null;
}
}