Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hang detection to @QuarkusTest #14507

Merged
merged 1 commit into from
Jan 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package io.quarkus.deployment.configuration;

import java.time.Duration;

import io.quarkus.runtime.annotations.ConfigItem;
import io.quarkus.runtime.annotations.ConfigRoot;

@ConfigRoot
public class TestConfig {

/**
* Configures the hang detection in @QuarkusTest. If no activity happens (i.e. no test callbacks are called) over
* this period then QuarkusTest will dump all threads stack traces, to help diagnose a potential hang.
*
* Note that the initial timeout (before Quarkus has started) will only apply if provided by a system property, as
* it is not possible to read all config sources until Quarkus has booted.
*/
@ConfigItem(defaultValue = "10m")
Duration hangDetectionTimeout;

}
11 changes: 11 additions & 0 deletions docs/src/main/asciidoc/getting-started-testing.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,17 @@ but it is common to create custom implementations to address specific applicatio
Common cases include starting docker containers using https://www.testcontainers.org/[Testcontainers] (an example of which can be found https://github.com/quarkusio/quarkus-quickstarts/blob/master/kafka-quickstart/src/test/java/org/acme/kafka/KafkaResource.java[here]),
or starting a mock HTTP server using http://wiremock.org/[Wiremock] (an example of which can be found https://github.com/geoand/quarkus-test-demo/blob/master/src/test/java/org/acme/getting/started/country/WiremockCountries.java[here]).

== Hang Detection

`@QuarkusTest` has support for hang detection to help diagnose any unexpected hangs. If no progress is made for a specified
time (i.e. no JUnit callbacks are invoked) then Quarkus will print a stack trace to the console to help diagnose the hang.
The default value for this timeout is 10 minutes.

No further action will be taken, and the tests will continue as normal (generally until CI times out), however the printed
stack traces should help diagnose why the build has failed. You can control this timeout with the
`quarkus.test.hang-detection-timeout` system property (you can also set this in application.properties, but this won't
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, maybe something like "If you set this property in application.properties the value will be ignored and the default value will be used instead. The reason is that the timeout is initialized before the test application starts." or something like that...

be read until Quarkus has started, so the timeout for Quarkus start will be the default of 10 minutes).

== Native Executable Testing

It is also possible to test native executables using `@NativeImageTest`. This supports all the features mentioned in this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ quarkus.native.resources.excludes = **/unwanted.*

quarkus.log.metrics.enabled=true

quarkus.native.additional-build-args =-H:ReflectionConfigurationFiles=reflection-config.json
quarkus.native.additional-build-args =-H:ReflectionConfigurationFiles=reflection-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.AbstractMap;
import java.util.ArrayDeque;
import java.util.ArrayList;
Expand All @@ -25,7 +29,11 @@
import java.util.Optional;
import java.util.ServiceLoader;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import java.util.function.Function;
Expand Down Expand Up @@ -73,6 +81,7 @@
import io.quarkus.deployment.builditem.TestClassBeanBuildItem;
import io.quarkus.deployment.builditem.TestClassPredicateBuildItem;
import io.quarkus.runtime.LaunchMode;
import io.quarkus.runtime.configuration.DurationConverter;
import io.quarkus.runtime.configuration.ProfileManager;
import io.quarkus.runtime.test.TestHttpEndpointProvider;
import io.quarkus.test.common.PathTestHelper;
Expand Down Expand Up @@ -102,6 +111,7 @@ public class QuarkusTestExtension

protected static final String TEST_LOCATION = "test-location";
protected static final String TEST_CLASS = "test-class";
public static final String QUARKUS_TEST_HANG_DETECTION_TIMEOUT = "quarkus.test.hang-detection-timeout";

private static boolean failedBoot;

Expand All @@ -124,8 +134,58 @@ public class QuarkusTestExtension
private static DeepClone deepClone;
//needed for @Nested
private static Deque<Class<?>> currentTestClassStack = new ArrayDeque<>();
private static ScheduledExecutorService hangDetectionExecutor;
private static Duration hangTimeout;
private static ScheduledFuture<?> hangTaskKey;
private static final Runnable hangDetectionTask = new Runnable() {

final AtomicBoolean runOnce = new AtomicBoolean();

@Override
public void run() {
if (!runOnce.compareAndSet(false, true)) {
return;
}
System.err.println("@QuarkusTest has detected a hang, as there has been no test activity in " + hangTimeout);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be useful to be able (config?) to dump the stack trace to a file instead?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this is most likely going to be useful in CI, it is so much easier to just use system err.
If we were to dump it to a file, it would need to be in a location that is then included in the artifacts produced by the CI pipeline

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm talking about local builds... but the truth is that you can easily dump the stack manually in this case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly

System.err.println("To configure this timeout use the " + QUARKUS_TEST_HANG_DETECTION_TIMEOUT + " config property");
System.err.println("A stack track is below to help diagnose the potential hang");
System.err.println("=== Stack Trace ===");
ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads(true, true);
for (ThreadInfo info : threads) {
if (info == null) {
System.err.println(" Inactive");
continue;
}
Thread.State state = info.getThreadState();
System.err.println("Thread " + info.getThreadName() + ": " + state);
if (state == Thread.State.WAITING) {
System.err.println(" Waiting on " + info.getLockName());
} else if (state == Thread.State.BLOCKED) {
System.err.println(" Blocked on " + info.getLockName());
System.err.println(" Blocked by " + info.getLockOwnerName());
}
System.err.println(" Stack:");
for (StackTraceElement frame : info.getStackTrace()) {
System.err.println(" " + frame.toString());
}
}
System.err.println("=== End Stack Trace ===");
//we only every dump once
}
};

private ExtensionState doJavaStart(ExtensionContext context, Class<? extends QuarkusTestProfile> profile) throws Throwable {
hangDetectionExecutor = Executors.newSingleThreadScheduledExecutor();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could skip the executor completely if -Dquarkus.test.hang-detection-timeout=-1?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That make sense, but in what scenario do you envision doing that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in case, if there are any problems with the executor/cancelling the task or something like that...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it can't hurt :)

String time = "10m";
//config is not established yet
//we can only read from system properties
String sysPropString = System.getProperty(QUARKUS_TEST_HANG_DETECTION_TIMEOUT);
if (sysPropString != null) {
time = sysPropString;
}
hangTimeout = new DurationConverter().convert(time);
hangTaskKey = hangDetectionExecutor.schedule(hangDetectionTask, hangTimeout.toMillis(), TimeUnit.MILLISECONDS);

quarkusTestProfile = profile;
Closeable testResourceManager = null;
try {
Expand Down Expand Up @@ -258,6 +318,14 @@ private ExtensionState doJavaStart(ExtensionContext context, Class<? extends Qua

runningQuarkusApplication = startupAction.run();

//now we have full config reset the hang timer

if (hangTaskKey != null) {
hangTaskKey.cancel(false);
hangTimeout = runningQuarkusApplication.getConfigValue(QUARKUS_TEST_HANG_DETECTION_TIMEOUT, Duration.class)
.orElse(Duration.of(10, ChronoUnit.MINUTES));
hangTaskKey = hangDetectionExecutor.schedule(hangDetectionTask, hangTimeout.toMillis(), TimeUnit.MILLISECONDS);
}
ConfigProviderResolver.setInstance(new RunningAppConfigResolver(runningQuarkusApplication));

System.setProperty("test.url", TestHTTPResourceManager.getUri(runningQuarkusApplication));
Expand All @@ -276,15 +344,19 @@ public void close() throws IOException {
shutdownTasks.pop().run();
}
} finally {
for (Map.Entry<String, String> entry : sysPropRestore.entrySet()) {
String val = entry.getValue();
if (val == null) {
System.clearProperty(entry.getKey());
} else {
System.setProperty(entry.getKey(), val);
try {
for (Map.Entry<String, String> entry : sysPropRestore.entrySet()) {
String val = entry.getValue();
if (val == null) {
System.clearProperty(entry.getKey());
} else {
System.setProperty(entry.getKey(), val);
}
}
tm.close();
} finally {
hangDetectionExecutor.shutdown();
}
tm.close();
}
try {
TestClassIndexer.removeIndex(requiredTestClass);
Expand Down Expand Up @@ -391,6 +463,7 @@ public void beforeEach(ExtensionContext context) throws Exception {
if (isNativeTest()) {
return;
}
resetHangTimeout();
if (!failedBoot) {
ClassLoader original = setCCL(runningQuarkusApplication.getClassLoader());
try {
Expand Down Expand Up @@ -459,6 +532,7 @@ public void afterEach(ExtensionContext context) throws Exception {
if (isNativeTest()) {
return;
}
resetHangTimeout();
if (!failedBoot) {
popMockContext();
ClassLoader original = setCCL(runningQuarkusApplication.getClassLoader());
Expand Down Expand Up @@ -586,6 +660,7 @@ public void beforeAll(ExtensionContext context) throws Exception {
if (isNativeTest()) {
return;
}
resetHangTimeout();
ensureStarted(context);
if (runningQuarkusApplication != null) {
pushMockContext();
Expand Down Expand Up @@ -625,6 +700,7 @@ public void interceptBeforeAllMethod(Invocation<Void> invocation, ReflectiveInvo
invocation.proceed();
return;
}
resetHangTimeout();
ensureStarted(extensionContext);
if (failedBoot) {
throwBootFailureException();
Expand All @@ -640,6 +716,7 @@ public <T> T interceptTestClassConstructor(Invocation<T> invocation,
if (isNativeTest()) {
return invocation.proceed();
}
resetHangTimeout();
ExtensionState state = ensureStarted(extensionContext);
if (failedBoot) {
throwBootFailureException();
Expand Down Expand Up @@ -796,6 +873,7 @@ public void interceptAfterAllMethod(Invocation<Void> invocation, ReflectiveInvoc

private Object runExtensionMethod(ReflectiveInvocationContext<Method> invocationContext, ExtensionContext extensionContext)
throws Throwable {
resetHangTimeout();
Method newMethod = null;

ClassLoader old = setCCL(runningQuarkusApplication.getClassLoader());
Expand Down Expand Up @@ -850,6 +928,7 @@ private Object runExtensionMethod(ReflectiveInvocationContext<Method> invocation

@Override
public void afterAll(ExtensionContext context) throws Exception {
resetHangTimeout();
try {
if (!isNativeTest() && (runningQuarkusApplication != null)) {
popMockContext();
Expand Down Expand Up @@ -922,6 +1001,7 @@ public void run() {

@Override
public void close() {
resetHangTimeout();
if (closed.compareAndSet(false, true)) {
ClassLoader old = Thread.currentThread().getContextClassLoader();
if (runningQuarkusApplication != null) {
Expand Down Expand Up @@ -1045,4 +1125,11 @@ public void execute(BuildContext context) {
return allCustomizers;
}
}

private static void resetHangTimeout() {
if (hangTaskKey != null) {
hangTaskKey.cancel(false);
hangTaskKey = hangDetectionExecutor.schedule(hangDetectionTask, hangTimeout.toMillis(), TimeUnit.MILLISECONDS);
}
}
}