/**
*
*/
package system;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.UnknownHostException;
import java.rmi.Naming;
import java.rmi.NotBoundException;
import java.rmi.RemoteException;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.rmi.server.UnicastRemoteObject;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Vector;
import java.util.logging.Logger;
import system.Communicator.CommunicatorState;
import system.Worker.WorkerState;
import utility.JPregelLogger;
import api.Vertex;
import exceptions.DataNotFoundException;
import exceptions.IllegalClassException;
import exceptions.IllegalInputException;
import exceptions.IllegalMessageException;
/**
*
* Implementation of interface worker manager.
*
* @author Manasa Chandrasekhar
* @author Kowshik Prakasam
*
*/
public class WorkerManagerImpl extends UnicastRemoteObject implements
WorkerManager, MessageSpooler {
/**
*
*/
private static final long serialVersionUID = -2375569067650804665L;
private static int PORT_NUMBER;
static {
PORT_NUMBER = new Random().nextInt(3000) + 2000;
}
private String id;
private Logger logger;
private static final String LOG_FILE_PREFIX = JPregelConstants.LOG_DIR
+ "workermanager_";
private static final String LOG_FILE_SUFFIX = ".log";
private ManagerToMaster master;
private Map<Integer, Vertex> idVertexMap;
private List<Worker> workers;
private String vertexClassName;
private List<Message> incomingMsgs;
private Communicator aCommunicator;
private int superStep;
private boolean isCheckPoint;
private int numWorkers;
private boolean justRecovered;
public Communicator getCommunicator() {
return aCommunicator;
}
public void setaCommunicator(Communicator aCommunicator) {
this.aCommunicator = aCommunicator;
}
private void initLogger() throws IOException {
this.logger = JPregelLogger.getLogger(this.getId(), LOG_FILE_PREFIX
+ this.getId() + LOG_FILE_SUFFIX);
}
private WorkerManagerImpl() throws IOException {
this.workers = new Vector<Worker>();
this.idVertexMap = new HashMap<Integer, Vertex>();
this.incomingMsgs = new LinkedList<Message>();
this.setId(InetAddress.getLocalHost().getHostName() + "_"
+ WorkerManagerImpl.getRandomChars());
this.aCommunicator = new Communicator(this, this.getId());
this.numWorkers = 1;
this.initLogger();
}
public WorkerManagerImpl(ManagerToMaster master, String vertexClassName)
throws IOException {
this();
this.master = master;
this.vertexClassName = vertexClassName;
}
/**
* @param string
*/
private void setId(String id) {
this.id = id;
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#getId()
*/
@Override
public String getId() {
return id;
}
public static void main(String[] args) throws IOException,
IllegalClassException {
String masterServer = args[0];
String vertexClassName = args[1];
try {
Class<?> c = Class.forName(vertexClassName);
if (!c.getSuperclass().equals(Vertex.class)) {
throw new IllegalClassException(vertexClassName);
}
} catch (ClassNotFoundException e) {
System.err.println("Client vertex class not found !");
e.printStackTrace();
}
if (System.getSecurityManager() == null) {
System.setSecurityManager(new SecurityManager());
}
try {
System.out.println("Looking up ManagerToMaster service : "
+ masterServer + "/" + ManagerToMaster.SERVICE_NAME);
ManagerToMaster master = (ManagerToMaster) Naming.lookup("//"
+ masterServer + "/" + ManagerToMaster.SERVICE_NAME);
WorkerManagerImpl mgr = new WorkerManagerImpl(master,
vertexClassName);
Registry registry = LocateRegistry.createRegistry(PORT_NUMBER);
registry.rebind(mgr.getId(), mgr);
System.out.println("Worker manager registered as " + mgr.getId()
+ " in port : " + PORT_NUMBER);
master.register(mgr, mgr.getId());
System.out.println("Worker Manager registered to master");
} catch (RemoteException e) {
System.err.println("WorkerManagerImpl Remote exception : ");
e.printStackTrace();
} catch (MalformedURLException e) {
System.err.println("WorkerManagerImpl Malformed exception : ");
e.printStackTrace();
} catch (NotBoundException e) {
System.err.println("WorkerManagerImpl NotBound exception : ");
e.printStackTrace();
}
}
/**
*
* @return A random name made up of exactly three alphabets
*/
public static String getRandomChars() {
char first = (char) ((new Random().nextInt(26)) + 65);
char second = (char) ((new Random().nextInt(26)) + 65);
char third = (char) ((new Random().nextInt(26)) + 65);
return "" + first + second + third;
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#initialize(java.util.List)
*/
@Override
public void initialize(List<Integer> partitionNumbers, int numWorkers,
int partitionSize, int numVertices) throws RemoteException {
logger.info("Received partitionNumbers : " + partitionNumbers);
this.setNumWorkers(numWorkers);
// Set datalocator in communicator
DataLocator aDataLocator = null;
try {
aDataLocator = DataLocator.getDataLocator(partitionSize);
} catch (IOException e) {
String msg = "RemoteException in DataLocator in worker manager : "
+ this.getId();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
}
this.getCommunicator().setDataLocator(aDataLocator);
// assign partitions to workers
List<List<Integer>> assignedPartitions = this
.assignPartitions(partitionNumbers);
// for each assigned partition, initialize the worker
for (List<Integer> threadPartition : assignedPartitions) {
try {
Worker aWkr = new Worker(threadPartition, partitionSize, this,
vertexClassName, this.getCommunicator(), numVertices);
this.getCommunicator().registerWorker(aWkr);
this.idVertexMap.putAll(aWkr.getVertices());
logger.info("Cached all vertices of this worker. Size of id->vertex map is : "
+ this.idVertexMap.size());
this.workers.add(aWkr);
logger.info("Added new worker : " + aWkr);
} catch (DataNotFoundException e) {
logger.severe("Unable to read partitions in worker manager : "
+ this.getId());
e.printStackTrace();
} catch (IOException e) {
logger.severe("Unable to read partitions in worker manager : "
+ this.getId());
e.printStackTrace();
} catch (IllegalInputException e) {
logger.severe("Unable to read partitions in worker manager : "
+ this.getId());
e.printStackTrace();
} catch (InstantiationException e) {
logger.severe("Unable to instantiate client vertex class : "
+ vertexClassName);
e.printStackTrace();
} catch (IllegalAccessException e) {
logger.severe("Unable to instantiate client vertex class : "
+ vertexClassName);
e.printStackTrace();
} catch (ClassNotFoundException e) {
logger.severe("Client vertex class not found : "
+ vertexClassName);
e.printStackTrace();
}
}
logger.info("Initialized worker manager : " + this.getId()
+ "\n\n Workers are : " + workers);
}
/**
* @param b
*/
private synchronized void setRecoveryStep(boolean isRecoveryStep) {
this.justRecovered = isRecoveryStep;
}
private synchronized boolean justRecovered() {
return this.justRecovered;
}
// Returns partition assignments to worker managers
private List<List<Integer>> assignPartitions(List<Integer> partitions) {
Iterator<Integer> it = partitions.iterator();
List<Integer> threadPartitions = new Vector<Integer>();
List<List<Integer>> assignedPartitions = new Vector<List<Integer>>();
int wkrPartitionCount = partitions.size() / this.getNumWorkers();
if (wkrPartitionCount == 0 && this.getNumWorkers() != 0) {
wkrPartitionCount = partitions.size();
}
int assignedWrkrs = 0;
while (it.hasNext() && (assignedWrkrs != this.getNumWorkers())) {
int thisWkrPartitionCount = 0;
threadPartitions = new Vector<Integer>();
while (thisWkrPartitionCount < wkrPartitionCount && it.hasNext()) {
threadPartitions.add(it.next());
thisWkrPartitionCount++;
}
if (thisWkrPartitionCount > 0) {
if (it.hasNext() && assignedWrkrs + 1 == this.getNumWorkers()) {
while (it.hasNext()) {
threadPartitions.add(it.next());
}
}
assignedPartitions.add(threadPartitions);
assignedWrkrs++;
}
}
return assignedPartitions;
}
/**
* @param numWorkers
*/
private void setNumWorkers(int numWorkers) {
this.numWorkers = numWorkers;
}
private int getNumWorkers() {
return this.numWorkers;
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#executeSuperStep()
*/
@Override
public void beginSuperStep(int superStepNumber, boolean isCheckPoint)
throws RemoteException {
this.superStep = superStepNumber;
this.isCheckPoint = isCheckPoint;
logger.info("Beginning superstep : " + superStepNumber);
// Distribute messages from last superstep
try {
boolean msgDistributed = false;
if (!this.justRecovered()) {
msgDistributed = distributeMessages();
}
this.setRecoveryStep(false);
if (this.isCheckPoint
|| superStep == JPregelConstants.FIRST_SUPERSTEP) {
checkpointData();
}
if (msgDistributed
|| (superStepNumber == JPregelConstants.FIRST_SUPERSTEP)) {
// start communicator
logger.info("Starting communicator");
aCommunicator.setState(Communicator.CommunicatorState.EXECUTE);
logger.info("Set communicator state to EXECUTE");
// start workers
for (int index = 0; index < this.workers.size(); index++) {
Worker aWorker = this.workers.get(index);
aWorker.setSuperStep(superStepNumber);
aWorker.setState(Worker.WorkerState.EXECUTE);
}
} else {
logger.info("No messages in superstep : " + superStepNumber);
endSuperStep();
}
} catch (IllegalMessageException e) {
logger.severe(e.getMessage());
e.printStackTrace();
throw new RemoteException(e.getMessage(), e);
}
}
/**
* @throws IllegalMessageException
*
*/
private boolean distributeMessages() throws IllegalMessageException {
if (!incomingMsgs.isEmpty()) {
for (int index = 0; index < this.incomingMsgs.size(); index++) {
Message msg = this.incomingMsgs.get(index);
Vertex targetVertex = idVertexMap.get(msg.getDestVertexID());
if (targetVertex == null) {
throw new IllegalMessageException(msg, this.getId());
}
targetVertex.queueMessage(msg);
}
this.incomingMsgs.clear();
return true;
}
return false;
}
public void endSuperStep() throws RemoteException {
logger.info("Ending superstep");
master.endSuperStep(this.getId());
}
private void checkpointData() throws RemoteException {
try {
this.saveState();
} catch (IOException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
} catch (DataNotFoundException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
}
}
/**
* @throws DataNotFoundException
* @throws IOException
*
*/
private void saveState() throws IOException, DataNotFoundException {
for (int index = 0; index < this.workers.size(); index++) {
Worker aWorker = this.workers.get(index);
aWorker.setSuperStep(this.superStep);
aWorker.saveState();
logger.info("Checkpointed data for worker : " + aWorker.getId());
}
}
/*
* (non-Javadoc)
*
* @see system.VertexMessager#queueMessage(system.Message)
*/
@Override
public synchronized void queueMessage(Message msg) throws RemoteException {
logger.info("In superstep : " + this.superStep
+ ", queued next superstep message : " + msg);
this.incomingMsgs.add(msg);
}
/*
* (non-Javadoc)
*
* @see system.VertexMessaging#getQueueSize()
*/
@Override
public synchronized boolean isQueueEmpty() throws RemoteException {
if (this.incomingMsgs.size() > 0 || this.justRecovered()) {
return false;
}
return true;
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#getHostName()
*/
@Override
public String getHostInfo() throws RemoteException {
try {
return InetAddress.getLocalHost().getHostName() + ":" + PORT_NUMBER;
} catch (UnknownHostException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
}
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#writeSolutions()
*/
@Override
public void writeSolutions() throws RemoteException {
for (int index = 0; index < this.workers.size(); index++) {
Worker aWorker = this.workers.get(index);
try {
aWorker.writeSolutions();
} catch (IOException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
}
}
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#isAlive()
*/
@Override
public void isAlive() throws RemoteException {
// does nothing, dummy method to check if host is alive
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#stopSuperStep()
*/
@Override
public void stopSuperStep() throws RemoteException {
logger.severe("Received STOP signal from Master");
for (int index = 0; index < this.workers.size(); index++) {
Worker aWorker = this.workers.get(index);
aWorker.setState(WorkerState.STOP);
logger.severe("Issued STOP signal to worker : " + aWorker.getId());
}
logger.severe("Issued STOP signal to communicator");
aCommunicator.setState(CommunicatorState.STOP);
this.endSuperStep();
}
/*
* (non-Javadoc)
*
* @see system.WorkerManager#restoreState(int)
*/
@Override
public void restoreState(int lastCheckPoint, List<Integer> partitions)
throws RemoteException {
logger.warning("Restoring to last check point number : "
+ lastCheckPoint);
logger.warning("Received new partitions : " + partitions.toString());
// assign partitions to workers
List<List<Integer>> assignedPartitions = this
.assignPartitions(partitions);
// clear global maps and queues
this.incomingMsgs.clear();
this.idVertexMap.clear();
for (int index = 0; index < this.workers.size(); index++) {
Worker aWorker = this.workers.get(index);
try {
logger.info("Restoring state of worker : " + aWorker.getId());
aWorker.restoreState(lastCheckPoint, partitions);
this.idVertexMap.putAll(aWorker.getVertices());
logger.info("Restored all vertices for this worker. Size of id->vertex map now is : "
+ this.idVertexMap.size());
} catch (IOException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
} catch (DataNotFoundException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
} catch (ClassNotFoundException e) {
String msg = e.getMessage();
logger.severe(msg);
e.printStackTrace();
throw new RemoteException(msg, e);
}
}
}
}