diff --git a/README.md b/README.md index 085057e2..50654f4e 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ settings unexpectedly. * [Changelog](CHANGELOG.md) * [Global Security Configuration](docs/security.md) * [Logging](docs/logging.md) +* [Prometheus](docs/prometheus.md) * [Proxy Configuration](docs/proxy.md) ## Available options @@ -70,6 +71,7 @@ Name | Description `-passwordEnvVariable VAL` | Environment variable containing the Jenkins user API token or password. `-passwordFile VAL` | File containing the Jenkins user API token or password. `-pidFile VAL` | File to write PID to. The client will refuse to start if this file exists and the previous process is still running. +`-prometheusPort N` | If defined, then start an HTTP service on this port for Prometheus metrics. (default: -1) `-retry N` | Number of retries before giving up. Unlimited if not specified. (default: -1) `-retryBackOffStrategy RETRY_BACK_OFF_STRATEGY` | The mode controlling retry wait time. Can be either 'none' (use same interval between retries) or 'linear' (increase wait time before each retry up to maxRetryInterval) or 'exponential' (double wait interval on each retry up to maxRetryInterval). Default is 'none'. (default: NONE) `-retryInterval N` | Time to wait before retry in seconds. Default is 10 seconds. (default: 10) diff --git a/client/pom.xml b/client/pom.xml index 55c18590..7aaf1736 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -23,6 +23,7 @@ 8 + 1.6.0 UTF-8 UTF-8 UTF-8 @@ -166,5 +167,15 @@ oshi-core 5.3.2 + + io.micrometer + micrometer-core + ${micrometer.version} + + + io.micrometer + micrometer-registry-prometheus + ${micrometer.version} + diff --git a/client/src/main/java/hudson/plugins/swarm/Options.java b/client/src/main/java/hudson/plugins/swarm/Options.java index 6047f775..415e3354 100644 --- a/client/src/main/java/hudson/plugins/swarm/Options.java +++ b/client/src/main/java/hudson/plugins/swarm/Options.java @@ -203,4 +203,9 @@ public class Options { + " missing.", forbids = "-disableWorkDir") public boolean failIfWorkDirIsMissing = false; + + @Option( + name = "-prometheusPort", + usage = "If defined, then start an HTTP service on this port for Prometheus metrics.") + public int prometheusPort = -1; } diff --git a/client/src/main/java/hudson/plugins/swarm/SwarmClient.java b/client/src/main/java/hudson/plugins/swarm/SwarmClient.java index b780b791..34c290fe 100644 --- a/client/src/main/java/hudson/plugins/swarm/SwarmClient.java +++ b/client/src/main/java/hudson/plugins/swarm/SwarmClient.java @@ -1,10 +1,23 @@ package hudson.plugins.swarm; +import com.sun.net.httpserver.HttpServer; + import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.remoting.Launcher; import hudson.remoting.jnlp.Main; +import io.micrometer.core.instrument.binder.jvm.ClassLoaderMetrics; +import io.micrometer.core.instrument.binder.jvm.JvmGcMetrics; +import io.micrometer.core.instrument.binder.jvm.JvmHeapPressureMetrics; +import io.micrometer.core.instrument.binder.jvm.JvmMemoryMetrics; +import io.micrometer.core.instrument.binder.jvm.JvmThreadMetrics; +import io.micrometer.core.instrument.binder.system.FileDescriptorMetrics; +import io.micrometer.core.instrument.binder.system.ProcessorMetrics; +import io.micrometer.core.instrument.binder.system.UptimeMetrics; +import io.micrometer.prometheus.PrometheusConfig; +import io.micrometer.prometheus.PrometheusMeterRegistry; + import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang.StringUtils; import org.apache.hc.client5.http.auth.AuthCache; @@ -37,11 +50,13 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.io.UncheckedIOException; import java.io.UnsupportedEncodingException; import java.net.Inet4Address; import java.net.Inet6Address; import java.net.InetAddress; +import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.NetworkInterface; import java.net.SocketException; @@ -79,6 +94,7 @@ public class SwarmClient { private final Options options; private final String hash; private String name; + private HttpServer prometheusServer = null; public SwarmClient(Options options) { this.options = options; @@ -108,6 +124,10 @@ public SwarmClient(Options options) { "Problem reading labels from file " + options.labelsFile, e); } } + + if (options.prometheusPort > 0) { + startPrometheusService(options.prometheusPort); + } } public String getName() { @@ -653,6 +673,9 @@ private static String hash(File remoteFsRoot) { } public void exitWithStatus(int status) { + if (prometheusServer != null) { + prometheusServer.stop(1); + } System.exit(status); } @@ -660,6 +683,41 @@ public void sleepSeconds(int waitTime) throws InterruptedException { Thread.sleep(waitTime * 1000); } + private void startPrometheusService(int port) { + logger.fine("Starting Prometheus service on port " + port); + PrometheusMeterRegistry prometheusRegistry = + new PrometheusMeterRegistry(PrometheusConfig.DEFAULT); + // Add some standard metrics to the registry + new ClassLoaderMetrics().bindTo(prometheusRegistry); + new FileDescriptorMetrics().bindTo(prometheusRegistry); + new JvmGcMetrics().bindTo(prometheusRegistry); + new JvmHeapPressureMetrics().bindTo(prometheusRegistry); + new JvmMemoryMetrics().bindTo(prometheusRegistry); + new JvmThreadMetrics().bindTo(prometheusRegistry); + new ProcessorMetrics().bindTo(prometheusRegistry); + new UptimeMetrics().bindTo(prometheusRegistry); + + try { + prometheusServer = HttpServer.create(new InetSocketAddress(port), 0); + prometheusServer.createContext( + "/prometheus", + httpExchange -> { + String response = prometheusRegistry.scrape(); + byte[] responseContent = response.getBytes(StandardCharsets.UTF_8); + httpExchange.sendResponseHeaders(200, responseContent.length); + try (OutputStream os = httpExchange.getResponseBody()) { + os.write(responseContent); + } + }); + + new Thread(prometheusServer::start).start(); + } catch (IOException e) { + logger.severe("Failed to start Prometheus service: " + e.getMessage()); + throw new UncheckedIOException(e); + } + logger.info("Started Prometheus service on port " + port); + } + private static class DefaultTrustManager implements X509TrustManager { final List allowedFingerprints = new ArrayList<>(); diff --git a/docs/prometheus.md b/docs/prometheus.md new file mode 100644 index 00000000..58fa719b --- /dev/null +++ b/docs/prometheus.md @@ -0,0 +1,26 @@ +# Prometheus monitoring + +The Jenkins Swarm Client has support for [Prometheus](https://prometheus.io) monitoring, which can be used to scrape +data from a Prometheus server. To start a Prometheus endpoint, simply use a non-zero value for the `-prometheusPort` +option when starting the client JAR. The service will be stopped when the Swarm Client exits. + +The actual metrics can be accessed on the `/prometheus` endpoint. So for example, if the node's IP address is +`169.254.10.12`, and `9100` is passed to `-prometheusPort`, then the metrics can be accessed at: +`http://169.254.10.12:9100/prometheus`. + +## Data Reported + +The client reports metrics for: + +- Basic process info, including: + - Process uptime + - CPU time consumed + - Virtual memory consumed + - Resident memory consumed + - File descriptors consumed +- JVM metrics such as: + - CPU usage + - Memory usage + - Thread states + - Garbage collection statistics + - Class loader statistics diff --git a/plugin/src/test/java/hudson/plugins/swarm/SwarmClientIntegrationTest.java b/plugin/src/test/java/hudson/plugins/swarm/SwarmClientIntegrationTest.java index dad08ee5..5dbdfc35 100644 --- a/plugin/src/test/java/hudson/plugins/swarm/SwarmClientIntegrationTest.java +++ b/plugin/src/test/java/hudson/plugins/swarm/SwarmClientIntegrationTest.java @@ -34,8 +34,11 @@ import oshi.software.os.OSProcess; import oshi.software.os.OperatingSystem; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Writer; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -471,6 +474,26 @@ public void jarCacheWithCustomPath() throws Exception { jarCachePath); } + @Test + public void metricsPrometheus() throws Exception { + swarmClientRule.createSwarmClient("-prometheusPort", "9999"); + + // Fetch the metrics page from the client + StringBuilder content = new StringBuilder(); + try (InputStream is = new URL("http://localhost:9999/prometheus").openStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { + String inputLine; + while ((inputLine = reader.readLine()) != null) { + content.append(inputLine); + } + } + + // Assert that a non-zero length string was read + assertTrue(content.length() > 0); + // Assert that we got at least one known Prometheus metric + assertTrue(content.toString().contains("process_cpu_usage")); + } + @After public void tearDown() throws IOException { Files.deleteIfExists(getPidFile());