@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder()
.setName(testJobName)
.setVersion(testJobVersion)
.setImage(BUSYBOX)
.setCommand(IDLE_COMMAND)
.setCreatingUser(TEST_USER)
.build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING,
LONG_WAIT_SECONDS, SECONDS);
assertEquals(job, firstTaskStatus.getJob());
assertNotNull(dockerClient.inspectContainer(firstTaskStatus.getContainerId()));
// Stop zookeeper
zk().stop();
// Wait for a while and make sure that the container is still running
Thread.sleep(5000);
assertTrue(
dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
assertTrue(
dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertFalse(
dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Wait for a while and make sure that a new container was spawned
final String firstRestartedContainerId =
Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {
@Override
public String call() throws Exception {
final List<Container> containers = listContainers(dockerClient, testTag);
return containers.size() == 1 ? containers.get(0).id() : null;
}
});
// Stop the agent
agent2.stopAsync().awaitTerminated();
// Kill the container
dockerClient.killContainer(firstRestartedContainerId);
assertFalse(dockerClient.inspectContainer(firstRestartedContainerId).state().running());
// Start the agent again
startDefaultAgent(testHost());
// Wait for a while and make sure that a new container was spawned
final String secondRestartedContainerId =
Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {
@Override
public String call() throws Exception {
final List<Container> containers = listContainers(dockerClient, testTag);
return containers.size() == 1 ? containers.get(0).id() : null;
}
});
assertTrue(dockerClient.inspectContainer(secondRestartedContainerId).state().running());
// Start zookeeper
zk().start();
// Verify that the agent is listed as up