/*
* JBoss, Home of Professional Open Source
*
* Distributable under LGPL license.
* See terms of license at gnu.org.
*/
package org.jboss.cache.buddyreplication;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jboss.cache.CacheException;
import org.jboss.cache.CacheSPI;
import org.jboss.cache.Fqn;
import org.jboss.cache.Node;
import org.jboss.cache.RPCManager;
import org.jboss.cache.Region;
import org.jboss.cache.RegionManager;
import org.jboss.cache.config.BuddyReplicationConfig;
import org.jboss.cache.config.BuddyReplicationConfig.BuddyLocatorConfig;
import org.jboss.cache.config.Configuration;
import org.jboss.cache.config.Option;
import org.jboss.cache.factories.annotations.Inject;
import org.jboss.cache.factories.annotations.Stop;
import org.jboss.cache.lock.TimeoutException;
import org.jboss.cache.marshall.MethodCall;
import org.jboss.cache.marshall.MethodCallFactory;
import org.jboss.cache.marshall.MethodDeclarations;
import org.jboss.cache.notifications.Notifier;
import org.jboss.cache.notifications.annotation.CacheListener;
import org.jboss.cache.notifications.annotation.ViewChanged;
import org.jboss.cache.notifications.event.ViewChangedEvent;
import org.jboss.cache.statetransfer.StateTransferManager;
import org.jboss.cache.util.ExposedByteArrayOutputStream;
import org.jboss.cache.util.concurrent.ConcurrentHashSet;
import org.jboss.cache.util.reflect.ReflectionUtil;
import org.jboss.util.stream.MarshalledValueInputStream;
import org.jboss.util.stream.MarshalledValueOutputStream;
import org.jgroups.Address;
import org.jgroups.Channel;
import org.jgroups.View;
import org.jgroups.util.Util;
import java.io.ByteArrayInputStream;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
/**
* Class that manages buddy replication groups.
*
* @author <a href="mailto:manik@jboss.org">Manik Surtani (manik@jboss.org)</a>
*/
public class BuddyManager
{
private static Log log = LogFactory.getLog(BuddyManager.class);
/**
* Configuration object.
*/
BuddyReplicationConfig config;
/**
* Buddy locator class
*/
BuddyLocator buddyLocator;
/**
* back-refernce to the CacheImpl object
*/
private CacheSPI<?, ?> cache;
private Configuration configuration;
private RegionManager regionManager;
private Notifier notifier;
private StateTransferManager stateTransferManager;
private RPCManager rpcManager;
/**
* The buddy group set up for this instance
*/
BuddyGroup buddyGroup;
/**
* Map of buddy pools received from broadcasts
*/
final Map<Address, String> buddyPool = new ConcurrentHashMap<Address, String>();
/**
* The nullBuddyPool is a set of addresses that have not specified buddy pools.
*/
final Set<Address> nullBuddyPool = new ConcurrentHashSet<Address>();
/**
* Map of bddy groups the current instance participates in as a backup node.
* Keyed on String group name, values are BuddyGroup objects.
* Needs to deal with concurrent access - concurrent assignTo/removeFrom buddy grp
*/
Map<String, BuddyGroup> buddyGroupsIParticipateIn = new ConcurrentHashMap<String, BuddyGroup>();
/**
* Queue to deal with queued up view change requests - which are handled asynchronously
*/
private final BlockingQueue<MembershipChange> queue = new LinkedBlockingQueue<MembershipChange>();
/**
* Async thread that handles items on the view change queue
*/
private AsyncViewChangeHandlerThread asyncViewChangeHandler = new AsyncViewChangeHandlerThread();
/**
* Constants representng the buddy backup subtree
*/
public static final String BUDDY_BACKUP_SUBTREE = "_BUDDY_BACKUP_";
public static final Fqn<String> BUDDY_BACKUP_SUBTREE_FQN = Fqn.fromString(BUDDY_BACKUP_SUBTREE);
/**
* number of times to retry communicating with a selected buddy if the buddy has not been initialised.
*/
private final static int UNINIT_BUDDIES_RETRIES = 5;
/**
* wait time between retries
*/
private static final long[] UNINIT_BUDDIES_RETRY_NAPTIME = {500, 1000, 1500, 2000, 2500};
/**
* Lock to synchronise on to ensure buddy pool info is received before buddies are assigned to groups.
*/
private final Object poolInfoNotifierLock = new Object();
private CountDownLatch initialisationLatch = new CountDownLatch(1);
// a dummy MembershipChange - a poison-pill to be placed on the membership change queue to notify async handler
// threads to exit gracefully when the BuddyManager has been stopped.
private static final MembershipChange STOP_NOTIFIER = new MembershipChange(null, null);
private ViewChangeListener viewChangeListener; // the view-change viewChangeListener
private boolean receivedBuddyInfo;
public BuddyManager()
{
}
public BuddyManager(BuddyReplicationConfig config)
{
setupInternals(config);
}
private void setupInternals(BuddyReplicationConfig config)
{
this.config = config;
BuddyLocatorConfig blc = config.getBuddyLocatorConfig();
try
{
// it's OK if the buddy locator config is null.
buddyLocator = (blc == null) ? createDefaultBuddyLocator() : createBuddyLocator(blc);
}
catch (Exception e)
{
log.warn("Caught exception instantiating buddy locator", e);
log.error("Unable to instantiate specified buddyLocatorClass [" + blc + "]. Using default buddyLocator [" + NextMemberBuddyLocator.class.getName() + "] instead, with default properties.");
buddyLocator = createDefaultBuddyLocator();
}
// Update the overall config with the BuddyLocatorConfig actually used
if (blc != buddyLocator.getConfig())
{
config.setBuddyLocatorConfig(buddyLocator.getConfig());
}
}
@Inject
private void injectDependencies(CacheSPI cache, Configuration configuration, RegionManager regionManager, StateTransferManager stateTransferManager, RPCManager rpcManager, Notifier notifier)
{
this.cache = cache;
this.configuration = configuration;
this.regionManager = regionManager;
this.stateTransferManager = stateTransferManager;
this.rpcManager = rpcManager;
this.notifier = notifier;
setupInternals(configuration.getBuddyReplicationConfig());
}
public BuddyReplicationConfig getConfig()
{
return config;
}
protected BuddyLocator createBuddyLocator(BuddyLocatorConfig config) throws ClassNotFoundException, IllegalAccessException, InstantiationException
{
BuddyLocator bl = (BuddyLocator) Class.forName(config.getBuddyLocatorClass()).newInstance();
bl.init(config);
return bl;
}
protected BuddyLocator createDefaultBuddyLocator()
{
BuddyLocator bl = new NextMemberBuddyLocator();
bl.init(null);
return bl;
}
public boolean isEnabled()
{
return config.isEnabled();
}
public String getBuddyPoolName()
{
return config.getBuddyPoolName();
}
public static String getGroupNameFromAddress(Object address)
{
return address.toString().replace(':', '_');
}
/**
* Stops the buddy manager and the related async thread.
*/
@Stop
public void stop()
{
if (config.isEnabled())
{
log.debug("Stopping BuddyManager");
// unregister the viewChangeListener
if (cache != null) cache.removeCacheListener(viewChangeListener);
try
{
queue.clear();
queue.put(STOP_NOTIFIER);
}
catch (InterruptedException ie)
{
// do nothing - we're stopping anyway
}
}
}
// TODO: This needs to be started manually for now, rather than by @Start. See CacheImpl.internalStart()
public void init() throws CacheException
{
log.debug("Starting BuddyManager");
buddyGroup = new BuddyGroup();
buddyGroup.setDataOwner(cache.getLocalAddress());
Address localAddress = rpcManager.getLocalAddress();
if (localAddress == null)
{
if (configuration.getCacheMode() == Configuration.CacheMode.LOCAL)
{
log.warn("Buddy replication is enabled but cache mode is LOCAL - not starting BuddyManager!");
ReflectionUtil.setValue(config, "accessible", true);
config.setEnabled(false);
return;
}
else
{
throw new CacheException("Unable to initialize BuddyManager - the RPCManager has not connected to the cluster and local Address is null!");
}
}
buddyGroup.setGroupName(getGroupNameFromAddress(localAddress));
if (config.getBuddyPoolName() != null)
{
buddyPool.put(buddyGroup.getDataOwner(), config.getBuddyPoolName());
}
broadcastBuddyPoolMembership();
if (!cache.exists(BUDDY_BACKUP_SUBTREE_FQN)) cache.getRoot().addChildDirect(BUDDY_BACKUP_SUBTREE_FQN);
// allow waiting threads to process.
initialisationLatch.countDown();
// register a CacheImpl Listener to reassign buddies as and when view changes occur
viewChangeListener = new ViewChangeListener();
cache.addCacheListener(viewChangeListener);
// assign buddies based on what we know now
reassignBuddies(cache.getMembers());
queue.clear();
asyncViewChangeHandler.start();
}
public boolean isAutoDataGravitation()
{
return config.isAutoDataGravitation();
}
public boolean isDataGravitationRemoveOnFind()
{
return config.isDataGravitationRemoveOnFind();
}
public boolean isDataGravitationSearchBackupTrees()
{
return config.isDataGravitationSearchBackupTrees();
}
public int getBuddyCommunicationTimeout()
{
return config.getBuddyCommunicationTimeout();
}
// -------------- methods to be called by the tree cache viewChangeListener --------------------
static class MembershipChange
{
List<Address> oldMembers;
List<Address> newMembers;
public MembershipChange(List<Address> oldMembers, List<Address> newMembers)
{
this.oldMembers = oldMembers;
this.newMembers = newMembers;
}
public String toString()
{
return "MembershipChange: Old members = " + oldMembers + " New members = " + newMembers;
}
}
private synchronized void enqueueViewChange(List<Address> oldMembers, List<Address> newMembers)
{
// put this on a queue
try
{
if (queue.peek() != STOP_NOTIFIER)
{
//first empty the queue. All queued up view changes that have not been processed yet are now obsolete.
queue.clear();
MembershipChange mc = new MembershipChange(oldMembers, newMembers);
if (log.isTraceEnabled()) log.trace("Enqueueing " + mc + " for async processing");
queue.put(mc);
}
}
catch (InterruptedException e)
{
log.warn("Caught interrupted exception trying to enqueue a view change event", e);
}
}
/**
* Called by the TreeCacheListener when a
* view change is detected. Used to find new buddies if
* existing buddies have died or if new members to the cluster
* have been added. Makes use of the BuddyLocator and then
* makes RPC calls to remote nodes to assign/remove buddies.
*/
private void reassignBuddies(List<Address> members) throws CacheException
{
List<Address> membership = new ArrayList<Address>(members); // defensive copy
if (log.isDebugEnabled())
{
log.debug("Data owner address " + cache.getLocalAddress());
log.debug("Entering updateGroup. Current group: " + buddyGroup + ". Current View membership: " + membership);
}
// some of my buddies have died!
List<Address> newBuddies = buddyLocator.locateBuddies(buddyPool, membership, buddyGroup.getDataOwner());
List<Address> unreachableBuddies;
if (!(unreachableBuddies = checkBuddyStatus(newBuddies)).isEmpty())
{
// some of the new buddies are unreachable. Ditch them, try the algo again.
membership.removeAll(unreachableBuddies);
newBuddies = buddyLocator.locateBuddies(buddyPool, membership, buddyGroup.getDataOwner());
}
List<Address> uninitialisedBuddies = new ArrayList<Address>();
List<Address> originalBuddies = buddyGroup.getBuddies();
for (Address newBuddy : newBuddies)
{
if (!originalBuddies.contains(newBuddy))
{
uninitialisedBuddies.add(newBuddy);
}
}
List<Address> obsoleteBuddies = new ArrayList<Address>();
// find obsolete buddies
for (Address origBuddy : originalBuddies)
{
if (!newBuddies.contains(origBuddy))
{
obsoleteBuddies.add(origBuddy);
}
}
// Update buddy list
boolean buddyGroupMutated = !obsoleteBuddies.isEmpty() || !uninitialisedBuddies.isEmpty();
if (!obsoleteBuddies.isEmpty())
{
removeFromGroup(obsoleteBuddies);
}
else
{
log.trace("No obsolete buddies found, nothing to announce.");
}
if (!uninitialisedBuddies.isEmpty())
{
addBuddies(newBuddies);
}
else
{
log.trace("No uninitialized buddies found, nothing to announce.");
}
if (buddyGroupMutated)
{
if (log.isInfoEnabled()) log.info("Buddy group members have changed. New buddy group: " + buddyGroup);
configuration.getRuntimeConfig().setBuddyGroup(buddyGroup);
notifier.notifyBuddyGroupChange(buddyGroup, false);
}
else
log.debug("Nothing has changed; new buddy list is identical to the old one.");
}
/**
* Tests whether all members in the list are valid JGroups members.
*
* @param members
* @return
*/
private List<Address> checkBuddyStatus(List<Address> members)
{
Channel ch = configuration.getRuntimeConfig().getChannel();
View currentView = ch.getView();
List<Address> deadBuddies = new LinkedList<Address>();
for (Address a : members) if (!currentView.containsMember(a)) deadBuddies.add(a);
return deadBuddies;
}
// -------------- methods to be called by the tree cache --------------------
/**
* Called by CacheImpl._remoteAnnounceBuddyPoolName(Address address, String buddyPoolName)
* when a view change occurs and caches need to inform the cluster of which buddy pool it is in.
*/
public void handlePoolNameBroadcast(Address address, String poolName)
{
if (log.isDebugEnabled())
{
log.debug("BuddyManager@" + Integer.toHexString(hashCode()) + ": received announcement that cache instance " + address + " is in buddy pool " + poolName);
}
if (poolName != null)
{
buddyPool.put(address, poolName);
}
else
{
synchronized (nullBuddyPool)
{
if (!nullBuddyPool.contains(address)) nullBuddyPool.add(address);
}
}
// notify any waiting view change threads that buddy pool info has been received.
synchronized (poolInfoNotifierLock)
{
log.trace("Notifying any waiting view change threads that we have received buddy pool info.");
receivedBuddyInfo = true;
poolInfoNotifierLock.notifyAll();
}
}
/**
* Called by CacheImpl._remoteRemoveFromBuddyGroup(String groupName)
* when a method call for this is received from a remote cache.
*/
public void handleRemoveFromBuddyGroup(String groupName) throws BuddyNotInitException
{
try
{
if (!initialisationLatch.await(0, TimeUnit.NANOSECONDS))
throw new BuddyNotInitException("Not yet initialised");
}
catch (InterruptedException e)
{
log.debug("Caught InterruptedException", e);
}
if (log.isInfoEnabled()) log.info("Removing self from buddy group " + groupName);
buddyGroupsIParticipateIn.remove(groupName);
// remove backup data for this group
if (log.isInfoEnabled()) log.info("Removing backup data for group " + groupName);
try
{
// should be a LOCAL call.
cache.getInvocationContext().getOptionOverrides().setCacheModeLocal(true);
cache.removeNode(new Fqn(BUDDY_BACKUP_SUBTREE_FQN, groupName));
}
catch (CacheException e)
{
log.error("Unable to remove backup data for group " + groupName, e);
}
finally
{
cache.getInvocationContext().getOptionOverrides().setCacheModeLocal(false);
}
}
/**
* Called by CacheImpl._remoteAssignToBuddyGroup(BuddyGroup g) when a method
* call for this is received from a remote cache.
*
* @param newGroup the buddy group
* @param state Map<Fqn, byte[]> of any state from the DataOwner. Cannot
* be <code>null</code>.
*/
public void handleAssignToBuddyGroup(BuddyGroup newGroup, Map<Fqn, byte[]> state) throws Exception
{
try
{
if (!initialisationLatch.await(0, TimeUnit.NANOSECONDS))
{
if (log.isDebugEnabled())
log.debug("Local buddy mamanger not initialized, rejecting assign call " + newGroup);
throw new BuddyNotInitException("Not yet initialised");
}
}
catch (InterruptedException e)
{
log.debug("Caught InterruptedException", e);
}
if (log.isInfoEnabled()) log.info("Assigning self to buddy group " + newGroup);
buddyGroupsIParticipateIn.put(newGroup.getGroupName(), newGroup);
// Integrate state transfer from the data owner of the buddy group
Fqn integrationBase = new Fqn(BuddyManager.BUDDY_BACKUP_SUBTREE_FQN,
newGroup.getGroupName());
if (state.isEmpty())
{
if (configuredToFetchState())
log.info("Data owner has no state to set, even though buddy is configured to accept state. Assuming there is no data on the data owner.");
// create the backup region anyway
Option o = cache.getInvocationContext().getOptionOverrides();
o.setSkipCacheStatusCheck(true);
Node root = cache.getRoot();
o = cache.getInvocationContext().getOptionOverrides();
o.setCacheModeLocal(true);
o.setSkipCacheStatusCheck(true);
// FIXME -- restore the next line and remove the following one
// when JBCACHE-1265 is fixed
// root.addChild(new Fqn<String>(BUDDY_BACKUP_SUBTREE, newGroup.getGroupName()));
cache.put(new Fqn<String>(BUDDY_BACKUP_SUBTREE, newGroup.getGroupName()), null);
}
else
{
for (Map.Entry<Fqn, byte[]> entry : state.entrySet())
{
Fqn fqn = entry.getKey();
if (!regionManager.isInactive(fqn))
{
//ClassLoader cl = (marshaller == null) ? null : marshaller.getClassLoader(fqnS);
Fqn integrationRoot = new Fqn(integrationBase, fqn);
byte[] stateBuffer = entry.getValue();
MarshalledValueInputStream in = null;
try
{
ByteArrayInputStream bais = new ByteArrayInputStream(stateBuffer);
in = new MarshalledValueInputStream(bais);
//stateMgr.setState(in, integrationRoot, cl);
stateTransferManager.setState(in, integrationRoot);
}
catch (Throwable t)
{
if (t instanceof CacheException)
{
//excepected/common and can happen due to inactive regions and so on
log.debug(t);
}
else
{
//something has gone wrong
log.error("State for fqn " + fqn
+ " could not be transferred to a buddy at "
+ cache.getLocalAddress(), t);
}
}
finally
{
if (in != null)
{
in.close();
}
}
}
}
}
}
/**
* Returns a List<IpAddress> identifying the DataOwner for each buddy
* group for which this node serves as a backup node.
*/
public List<Address> getBackupDataOwners()
{
List<Address> owners = new ArrayList<Address>();
for (BuddyGroup group : buddyGroupsIParticipateIn.values())
{
owners.add(group.getDataOwner());
}
return owners;
}
// -------------- static util methods ------------------
/**
* Utility method that retrieves a buddy backup Fqn given the actual Fqn of some data and the data owner's Address.
*
* @param dataOwnerAddress the JGroups {@link org.jgroups.Address} of the data owner
* @param origFqn the original Fqn
* @return a backup Fqn
*/
public static Fqn getBackupFqn(Address dataOwnerAddress, Fqn origFqn)
{
return getBackupFqn(getGroupNameFromAddress(dataOwnerAddress), origFqn);
}
/**
* Utility method that retrieves a buddy backup Fqn given the actual Fqn of some data and the buddy group name.
*
* @param buddyGroupName the buddy group name
* @param origFqn the original Fqn
* @return a backup Fqn
*/
public static Fqn getBackupFqn(String buddyGroupName, Fqn origFqn)
{
if (isBackupFqn(origFqn))
throw new CacheException("Cannot make a backup Fqn from a backup Fqn! Attempting to create a backup of " + origFqn);
List<Object> elements = new ArrayList<Object>(origFqn.size() + 2);
elements.add(BUDDY_BACKUP_SUBTREE);
elements.add(buddyGroupName);
elements.addAll(origFqn.peekElements());
return new Fqn(elements, true);
}
/**
* Utility method that retrieves a buddy backup Fqn given the actual Fqn of some data and the backup subtree for the
* buddy group in question
*
* @param buddyGroupRoot the subtree under which data for a particular buddy is backed up
* @param origFqn the original Fqn
* @return a backup Fqn
*/
public static Fqn getBackupFqn(Fqn buddyGroupRoot, Fqn origFqn)
{
if (isBackupFqn(origFqn))
throw new CacheException("Cannot make a backup Fqn from a backup Fqn! Attempting to create a backup of " + origFqn);
List<Object> elements = new ArrayList<Object>(origFqn.size() + 2);
elements.add(BUDDY_BACKUP_SUBTREE);
elements.add(buddyGroupRoot.get(1));
elements.addAll(origFqn.peekElements());
return new Fqn(elements, true);
}
public static boolean isBackupFqn(Fqn name)
{
return name != null && name.hasElement(BuddyManager.BUDDY_BACKUP_SUBTREE);
}
// -------------- methods to be called by the BaseRPCINterceptor --------------------
/**
* Returns a list of buddies for which this instance is Data Owner.
* List excludes self. Used by the BaseRPCInterceptor when deciding
* who to replicate to.
*/
public List<Address> getBuddyAddresses()
{
return buddyGroup.getBuddies();
}
/**
* Introspects method call for Fqns and changes them such that they
* are under the current buddy group's backup subtree
* (e.g., /_buddy_backup_/my_host:7890/) rather than the root (/).
* Called by BaseRPCInterceptor to transform method calls before broadcasting.
*/
public MethodCall transformFqns(MethodCall call)
{
return transformFqns(call, call.getMethodId() != MethodDeclarations.dataGravitationCleanupMethod_id);
}
public MethodCall transformFqns(MethodCall call, boolean transformForCurrentCall)
{
if (call != null && call.getArgs() != null && call.getMethodId() != MethodDeclarations.commitMethod_id)
{
MethodCall call2 = MethodCallFactory.create(call.getMethodId(), call.getArgs().clone());
handleArgs(call2.getArgs(), transformForCurrentCall);
return call2;
}
else
{
return call;
}
}
// -------------- internal helpers methods --------------------
private void removeFromGroup(List<Address> buddies)
{
if (log.isDebugEnabled())
{
log.debug("Removing obsolete buddies from buddy group [" + buddyGroup.getGroupName() + "]. Obsolete buddies are " + buddies);
}
buddyGroup.removeBuddies(buddies);
// now broadcast a message to the removed buddies.
MethodCall membershipCall = MethodCallFactory.create(MethodDeclarations.remoteRemoveFromBuddyGroupMethod_id, buddyGroup.getGroupName());
int attemptsLeft = UNINIT_BUDDIES_RETRIES;
int currentAttempt = 0;
while (attemptsLeft-- > 0)
{
try
{
makeRemoteCall(buddies, membershipCall, true);
break;
}
catch (Exception e)
{
if (e instanceof BuddyNotInitException || e.getCause() instanceof BuddyNotInitException)
{
if (attemptsLeft > 0)
{
log.info("One of the buddies have not been initialised. Will retry after a short nap.");
try
{
Thread.sleep(UNINIT_BUDDIES_RETRY_NAPTIME[currentAttempt++]);
}
catch (InterruptedException e1)
{
// what do we do?
log.trace("Thread interrupted while sleeping/waiting for a retry", e1);
}
}
else
{
throw new BuddyNotInitException("Unable to contact buddy after " + UNINIT_BUDDIES_RETRIES + " retries");
}
}
else
{
log.error("Unable to communicate with Buddy for some reason", e);
}
}
}
log.trace("removeFromGroup notification complete");
}
private void addBuddies(List<Address> buddies) throws CacheException
{
// this check is redundant - if buddies is empty this method will not be called. - Manik
// if (buddies.size() == 0)
// return;
if (log.isDebugEnabled())
{
log.debug("Assigning new buddies to buddy group [" + buddyGroup.getGroupName() + "]. New buddies are " + buddies);
}
buddyGroup.addBuddies(buddies);
// Create the state transfer map
Map<Fqn, byte[]> stateMap = new HashMap<Fqn, byte[]>();
if (configuredToFetchState())
{
byte[] state;
if (configuration.isUseRegionBasedMarshalling())
{
Collection<Region> regions = regionManager.getAllRegions(Region.Type.MARSHALLING);
if (regions.size() > 0)
{
for (Region r : regions)
{
Fqn f = r.getFqn();
state = acquireState(f);
if (state != null)
{
stateMap.put(f, state);
}
}
}
else if (!configuration.isInactiveOnStartup())
{
// No regions defined; try the root
state = acquireState(Fqn.ROOT);
if (state != null)
{
stateMap.put(Fqn.ROOT, state);
}
}
}
else
{
state = acquireState(Fqn.ROOT);
if (state != null)
{
stateMap.put(Fqn.ROOT, state);
}
}
}
// now broadcast a message to the newly assigned buddies.
MethodCall membershipCall = MethodCallFactory.create(MethodDeclarations.remoteAssignToBuddyGroupMethod_id, buddyGroup, stateMap);
int attemptsLeft = UNINIT_BUDDIES_RETRIES;
int currentAttempt = 0;
while (attemptsLeft-- > 0)
{
try
{
makeRemoteCall(buddies, membershipCall, true);
break;
}
catch (Exception e)
{
if (e instanceof BuddyNotInitException || e.getCause() instanceof BuddyNotInitException)
{
if (attemptsLeft > 0)
{
log.info("One of the buddies have not been initialised. Will retry after a short nap.");
try
{
Thread.sleep(UNINIT_BUDDIES_RETRY_NAPTIME[currentAttempt++]);
}
catch (InterruptedException e1)
{
// what do we do?
log.trace("Thread interrupted while sleeping/waiting for a retry", e1);
}
}
else
{
throw new BuddyNotInitException("Unable to contact buddy after " + UNINIT_BUDDIES_RETRIES + " retries");
}
}
else
{
log.error("Unable to communicate with Buddy for some reason", e);
}
}
}
log.trace("addToGroup notification complete");
}
private boolean configuredToFetchState()
{
return configuration.isFetchInMemoryState() || (cache.getCacheLoaderManager() != null && cache.getCacheLoaderManager().isFetchPersistentState());
}
private byte[] acquireState(Fqn fqn) throws CacheException
{
// Call _getState with progressively longer timeouts until we
// get state or it doesn't throw a TimeoutException
long[] timeouts = {400, 800, 1600};
TimeoutException timeoutException = null;
boolean trace = log.isTraceEnabled();
for (int i = 0; i < timeouts.length; i++)
{
timeoutException = null;
boolean force = (i == timeouts.length - 1);
try
{
byte[] state = generateState(fqn, timeouts[i], force, false);
if (log.isDebugEnabled())
{
log.debug("acquireState(): got state");
}
return state;
}
catch (TimeoutException t)
{
timeoutException = t;
if (trace)
{
log.trace("acquireState(): got a TimeoutException");
}
}
catch (Exception e)
{
throw new CacheException("Error acquiring state", e);
}
catch (Throwable t)
{
throw new RuntimeException(t);
}
}
// If we got a timeout exception on the final try,
// this is a failure condition
if (timeoutException != null)
{
throw new CacheException("acquireState(): Failed getting state due to timeout",
timeoutException);
}
if (log.isDebugEnabled())
{
log.debug("acquireState(): Unable to give state");
}
return null;
}
/**
* Returns the state for the portion of the cache named by <code>fqn</code>.
* <p/>
* State returned is a serialized byte[][], element 0 is the transient state
* (or null), and element 1 is the persistent state (or null).
*
* @param fqn Fqn indicating the uppermost node in the
* portion of the cache whose state should be returned.
* @param timeout max number of ms this method should wait to acquire
* a read lock on the nodes being transferred
* @param force if a read lock cannot be acquired after
* <code>timeout</code> ms, should the lock acquisition
* be forced, and any existing transactions holding locks
* on the nodes be rolled back? <strong>NOTE:</strong>
* In release 1.2.4, this parameter has no effect.
* @param suppressErrors should any Throwable thrown be suppressed?
* @return a serialized byte[][], element 0 is the transient state
* (or null), and element 1 is the persistent state (or null).
*/
public byte[] generateState(Fqn fqn, long timeout, boolean force, boolean suppressErrors) throws Throwable
{
MarshalledValueOutputStream out = null;
byte[] result = null;
try
{
ExposedByteArrayOutputStream baos = new ExposedByteArrayOutputStream(16 * 1024);
out = new MarshalledValueOutputStream(baos);
stateTransferManager.getState(out, fqn, timeout, force, suppressErrors);
result = baos.getRawBuffer();
}
finally
{
Util.close(out);
}
return result;
}
/**
* Called by the BuddyGroupMembershipMonitor every time a view change occurs.
*/
private void broadcastBuddyPoolMembership()
{
broadcastBuddyPoolMembership(null);
}
private void broadcastBuddyPoolMembership(List<Address> recipients)
{
// broadcast to other caches
if (log.isDebugEnabled())
{
log.debug("Instance " + buddyGroup.getDataOwner() + " broadcasting membership in buddy pool " + config.getBuddyPoolName() + " to recipients " + recipients);
}
MethodCall membershipCall = MethodCallFactory.create(MethodDeclarations.remoteAnnounceBuddyPoolNameMethod_id, buddyGroup.getDataOwner(), config.getBuddyPoolName());
try
{
makeRemoteCall(recipients, membershipCall, true);
}
catch (Exception e)
{
log.error("Problems broadcasting buddy pool membership info to cluster", e);
}
}
private void makeRemoteCall(List<Address> recipients, MethodCall call, boolean sync) throws Exception
{
// remove non-members from dest list
if (recipients != null)
{
Iterator<Address> recipientsIt = recipients.iterator();
List<Address> members = cache.getMembers();
while (recipientsIt.hasNext())
{
if (!members.contains(recipientsIt.next()))
{
recipientsIt.remove();
}
}
}
rpcManager.callRemoteMethods(recipients, call, sync, true, config.getBuddyCommunicationTimeout(), false);
}
private void handleArgs(Object[] args, boolean transformForCurrentCall)
{
for (int i = 0; i < args.length; i++)
{
if (args[i] instanceof MethodCall)
{
MethodCall call = (MethodCall) args[i];
args[i] = transformFqns((MethodCall) args[i], call.getMethodId() != MethodDeclarations.dataGravitationCleanupMethod_id);
}
if (args[i] instanceof List && args[i] != null)
{
Object[] asArray = ((List) args[i]).toArray();
handleArgs(asArray, transformForCurrentCall);
List<Object> newList = new ArrayList<Object>(asArray.length);
newList.addAll(Arrays.asList(asArray));
args[i] = newList;
}
if (args[i] instanceof Fqn)
{
Fqn fqn = (Fqn) args[i];
if (transformForCurrentCall) args[i] = getBackupFqn(fqn);
}
}
}
/**
* Assumes the backup Fqn if the current instance is the data owner
*
* @param originalFqn
* @return backup fqn
*/
public Fqn getBackupFqn(Fqn originalFqn)
{
return getBackupFqn(buddyGroup == null || buddyGroup.getGroupName() == null ? "null" : buddyGroup.getGroupName(), originalFqn);
}
public static Fqn getActualFqn(Fqn fqn)
{
if (!isBackupFqn(fqn)) return fqn;
// remove the first 2 elements
return fqn.getSubFqn(2, fqn.size());
}
/**
* Asynchronous thread that deals with handling view changes placed on a queue
*/
private class AsyncViewChangeHandlerThread implements Runnable
{
private Thread t;
private boolean isRunning = true;
public void start()
{
if (t == null || !t.isAlive())
{
t = new Thread(this);
t.setName("AsyncViewChangeHandlerThread," + cache.getLocalAddress());
t.setDaemon(true);
t.start();
}
}
public void run()
{
log.trace("Started");
// don't start this thread until the Buddy Manager has initialised as it cocks things up.
try
{
initialisationLatch.await();
}
catch (InterruptedException e)
{
log.debug("Caught InterruptedException", e);
}
while (!Thread.interrupted() && isRunning)
{
try
{
handleEnqueuedViewChange();
}
catch (InterruptedException e)
{
break;
}
catch (Throwable t)
{
// Don't let the thread die
log.error("Caught exception handling view change", t);
}
}
log.trace("Exiting run()");
}
private void handleEnqueuedViewChange() throws Exception
{
log.trace("Waiting for enqueued view change events");
MembershipChange members = queue.take();
if (members == STOP_NOTIFIER)
{
log.trace("Caught stop notifier, time to go home.");
// time to go home
isRunning = false;
return;
}
// there is a strange case where JGroups issues view changes and just includes self in new views, and then
// quickly corrects it. Happens intermittently on some unit tests. If this is such a case, please ignore.
if (members.newMembers.size() == 1 && members.newMembers.get(0).equals(cache.getLocalAddress()))
{
log.info("Ignoring membership change event since it only contains self.");
return;
}
broadcastPoolMembership(members);
boolean rebroadcast = false;
// make sure new buddies have broadcast their pool memberships.
while (!buddyPoolInfoAvailable(members.newMembers))
{
rebroadcast = true;
synchronized (poolInfoNotifierLock)
{
log.trace("Not received necessary buddy pool info for all new members yet; waiting on poolInfoNotifierLock.");
while (!receivedBuddyInfo)
poolInfoNotifierLock.wait();
log.trace("Notified!!");
receivedBuddyInfo = false;
}
}
if (rebroadcast) broadcastPoolMembership(members);
// always refresh buddy list.
reassignBuddies(members.newMembers);
}
private void broadcastPoolMembership(MembershipChange members)
{
log.trace("Broadcasting pool membership details, triggered by view change.");
if (members.oldMembers == null)
{
broadcastBuddyPoolMembership();
}
else
{
List<Address> delta = new ArrayList<Address>();
delta.addAll(members.newMembers);
delta.removeAll(members.oldMembers);
broadcastBuddyPoolMembership(delta);
}
}
private boolean buddyPoolInfoAvailable(List<Address> newMembers)
{
boolean infoReceived = true;
for (Address address : newMembers)
{
// make sure no one is concurrently writing to nullBuddyPool.
synchronized (nullBuddyPool)
{
// log.trace("Testing on node " + buddyGroup.getDataOwner() + " for candidate " + address);
// log.trace("Is me? " + address.equals(cache.getLocalAddress()));
// log.trace("is in bP? " + buddyPool.keySet().contains(address));
// log.trace("is in nBP? " + nullBuddyPool.contains(address));
infoReceived = infoReceived && (address.equals(cache.getLocalAddress()) || buddyPool.keySet().contains(address) || nullBuddyPool.contains(address));
}
}
if (log.isTraceEnabled())
{
log.trace(buddyGroup.getDataOwner() + " received buddy pool info for new members " + newMembers + "? " + infoReceived);
}
return infoReceived;
}
public void stop()
{
if (t != null) t.interrupt();
}
}
@CacheListener
public class ViewChangeListener
{
private Vector<Address> oldMembers;
@ViewChanged
public void handleViewChange(ViewChangedEvent event)
{
View newView = event.getNewView();
if (log.isTraceEnabled())
log.trace("BuddyManager CacheListener - got view change with new view " + newView);
Vector<Address> newMembers = newView.getMembers();
// the whole 'oldMembers' concept is only used for buddy pool announcements.
if (config.getBuddyPoolName() == null)
{
enqueueViewChange(null, newMembers);
}
else
{
enqueueViewChange(oldMembers == null ? null : new Vector<Address>(oldMembers), new Vector<Address>(newMembers));
if (oldMembers == null) oldMembers = new Vector<Address>();
oldMembers.clear();
oldMembers.addAll(newMembers);
}
}
}
}