Skip to content

Commit

Permalink
Integrated code lifecycle: Add timeout when stopping hanging containe…
Browse files Browse the repository at this point in the history
…rs in edge cases (#8587)
  • Loading branch information
laurenzfb authored May 18, 2024
1 parent c6390bb commit c4e0c9b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
Expand Down Expand Up @@ -199,24 +203,44 @@ public void stopContainer(String containerName) {
* @param containerId The ID of the container to stop or kill.
*/
public void stopUnresponsiveContainer(String containerId) {
ExecutorService executor = Executors.newSingleThreadExecutor();
try {
// Attempt to stop the container. It should stop the container and auto-remove it.
// {@link DockerClient#stopContainerCmd(String)} first sends a SIGTERM command to the container to gracefully stop it,
// and if it does not stop within the timeout, it sends a SIGKILL command to kill the container.
dockerClient.stopContainerCmd(containerId).withTimeout(5).exec();
log.info("Stopping container with id {}", containerId);

// Submit Docker stop command to executor service
Future<Void> future = executor.submit(() -> {
dockerClient.stopContainerCmd(containerId).withTimeout(5).exec();
return null; // Return type to match Future<Void>
});

// Await the future with a timeout
future.get(10, TimeUnit.SECONDS); // Wait for the stop command to complete with a timeout
}
catch (NotFoundException | NotModifiedException e) {
log.debug("Container with id {} is already stopped: {}", containerId, e.getMessage());
}
catch (Exception e) {
// In case the stopContainerCmd fails, we try to forcefully kill the container
log.warn("Failed to stop container with id {}. Attempting to kill container: {}", containerId, e.getMessage());

// Attempt to kill the container if stop fails
try {
dockerClient.killContainerCmd(containerId).exec();
Future<Void> killFuture = executor.submit(() -> {
dockerClient.killContainerCmd(containerId).exec();
return null;
});

killFuture.get(5, TimeUnit.SECONDS); // Wait for the kill command to complete with a timeout
}
catch (Exception killException) {
log.warn("Failed to kill container with id {}: {}", containerId, killException.getMessage());
}
}
finally {
executor.shutdown();
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ private void finishBuildJobExceptionally(String buildJobId, String containerName
buildLogsMap.appendBuildLogEntry(buildJobId, new BuildLogEntry(ZonedDateTime.now(), msg + "\n" + stackTrace));
log.error(msg);

log.info("Getting ID of running container {}", containerName);
String containerId = buildJobContainerService.getIDOfRunningContainer(containerName);
log.info("Stopping unresponsive container with ID {}", containerId);
if (containerId != null) {
buildJobContainerService.stopUnresponsiveContainer(containerId);
}
Expand Down

0 comments on commit c4e0c9b

Please sign in to comment.