From c48ccf5171a91461eaacba34a0f0d7af3a06b3c1 Mon Sep 17 00:00:00 2001 From: Roman Marchenko Date: Mon, 3 Jul 2023 17:25:26 +0000 Subject: [PATCH] PID adjustment on checkpoint Reviewed-by: akozlov --- src/hotspot/share/runtime/globals.hpp | 4 + src/java.base/share/man/java.1 | 5 + src/java.base/share/native/launcher/main.c | 107 ++++++++++++++- .../jdk/crac/ContainerPidAdjustmentTest.java | 125 ++++++++++++++++++ .../java/net/InetAddress/ResolveTest.java | 2 + test/lib/jdk/test/lib/crac/CracBuilder.java | 99 +++++++++++--- 6 files changed, 320 insertions(+), 22 deletions(-) create mode 100644 test/jdk/jdk/crac/ContainerPidAdjustmentTest.java diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index e21a7bdf51d..7ecec38742f 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -2097,6 +2097,10 @@ const intx ObjectAlignmentInBytes = 8; product(ccstr, CRaCRestoreFrom, NULL, RESTORE_SETTABLE, \ "Path to image for restore, replaces the initializing VM on success") \ \ + product(uint, CRaCMinPid, 128, \ + "Mininal PID value for checkpoint'ed process") \ + range(1, UINT_MAX) \ + \ product(ccstr, CREngine, "criuengine", RESTORE_SETTABLE, \ "Path or name of a program implementing checkpoint/restore and " \ "optional extra parameters as a comma-separated list: " \ diff --git a/src/java.base/share/man/java.1 b/src/java.base/share/man/java.1 index 9530cbd5e8f..206351b22f4 100644 --- a/src/java.base/share/man/java.1 +++ b/src/java.base/share/man/java.1 @@ -1325,6 +1325,11 @@ Restores a snapshot created by .RS .RE .TP +.B \f[CB]\-XX:CRaCMinPid=\f[R]value\f[R] +A desired minimal PID value for checkpoint'ed process. Ignored on restore. +.RS +.RE +.TP .B \f[CB]\-XX:ErrorFile=\f[R]\f[I]filename\f[R] Specifies the path and file name to which error data is written when an irrecoverable error occurs. diff --git a/src/java.base/share/native/launcher/main.c b/src/java.base/share/native/launcher/main.c index ebbc9134f96..22e02f9ba67 100644 --- a/src/java.base/share/native/launcher/main.c +++ b/src/java.base/share/native/launcher/main.c @@ -34,6 +34,13 @@ #include "jli_util.h" #include "jni.h" +#ifndef WIN32 +#include +#endif +#ifdef LINUX +#include +#endif + #ifdef _MSC_VER #if _MSC_VER > 1400 && _MSC_VER < 1600 @@ -97,6 +104,9 @@ WinMain(HINSTANCE inst, HINSTANCE previnst, LPSTR cmdline, int cmdshow) #include static int is_checkpoint = 0; +static const int crac_min_pid_default = 128; +static int crac_min_pid = 0; +static int is_min_pid_set = 0; static void parse_checkpoint(const char *arg) { if (!is_checkpoint) { @@ -106,6 +116,14 @@ static void parse_checkpoint(const char *arg) { is_checkpoint = 1; } } + if (!is_min_pid_set) { + const char *checkpoint_arg = "-XX:CRaCMinPid="; + const int len = strlen(checkpoint_arg); + if (0 == strncmp(arg, checkpoint_arg, len)) { + crac_min_pid = atoi(arg + len); + is_min_pid_set = 1; + } + } } static pid_t g_child_pid = -1; @@ -167,6 +185,61 @@ static void setup_sighandler() { } } +static int set_last_pid(int pid) { +#ifdef LINUX + char buf[11]; // enough for int32 + const int len = snprintf(buf, sizeof(buf), "%d", pid); + if (0 > len || sizeof(buf) < (size_t)len) { + return EINVAL; + } + const char *last_pid_filename = "/proc/sys/kernel/ns_last_pid"; + const int last_pid_file = open(last_pid_filename, O_WRONLY|O_TRUNC, 0666); + if (0 > last_pid_file) { + return errno; + } + int res = 0; + if (len > write(last_pid_file, buf, len)) { + res = errno; + } + close(last_pid_file); + return res; +#else + return EPERM; +#endif +} + +static void spin_last_pid(int pid) { + const int MaxSpinCount = pid < 1000 ? 1000 : pid; + int cnt = MaxSpinCount; + int child = 0; + int prev = 0; + do { + child = fork(); + if (0 > child) { + perror("spin_last_pid clone"); + exit(1); + } + if (0 == child) { + exit(0); + } + if (child < prev) { + fprintf(stderr, "%s: Invalid argument (%d)\n", __FUNCTION__, pid); + exit(1); + } + if (0 >= cnt) { + fprintf(stderr, "%s: Can't reach pid %d, out of try count. Current pid=%d\n", __FUNCTION__, pid, child); + exit(1); + } + prev = child; + int status; + if (0 > waitpid(child, &status, 0)) { + perror("spin_last_pid waitpid"); + exit(1); + } + --cnt; + } while (child < pid); +} + JNIEXPORT int main(int argc, char **argv) { @@ -278,10 +351,38 @@ main(int argc, char **argv) margv = args->elements; } - // Avoid unexpected process completion when checkpointing under docker container run - // by creating the main process waiting for children before exit. - if (is_checkpoint && 1 == getpid()) { + const int is_init = 1 == getpid(); + if (is_init && !is_min_pid_set) { + crac_min_pid = crac_min_pid_default; + } + const int needs_pid_adjust = getpid() < crac_min_pid; + if (is_checkpoint && (is_init || needs_pid_adjust)) { + // Move PID value for new processes to a desired value + // to avoid PID conflicts on restore. + if (needs_pid_adjust) { + const int res = set_last_pid(crac_min_pid); + if (EPERM == res || EACCES == res || EROFS == res) { + spin_last_pid(crac_min_pid); + } else if (0 != res) { + fprintf(stderr, "set_last_pid: %s\n", strerror(res)); + exit(1); + } + } + + // Avoid unexpected process completion when checkpointing under docker container run + // by creating the main process waiting for children before exit. g_child_pid = fork(); + if (0 == g_child_pid && needs_pid_adjust && getpid() < crac_min_pid) { + if (is_min_pid_set) { + fprintf(stderr, "Error: Can't adjust PID to min PID %d, current PID %d\n", crac_min_pid, (int)getpid()); + exit(1); + } else { + fprintf(stderr, + "Warning: Can't adjust PID to min PID %d, current PID %d.\n" + "This message can be suppressed by '-XX:CRaCMinPid=1' option\n", + crac_min_pid, (int)getpid()); + } + } if (0 < g_child_pid) { // The main process should forward signals to the child. setup_sighandler(); diff --git a/test/jdk/jdk/crac/ContainerPidAdjustmentTest.java b/test/jdk/jdk/crac/ContainerPidAdjustmentTest.java new file mode 100644 index 00000000000..c6e7375696a --- /dev/null +++ b/test/jdk/jdk/crac/ContainerPidAdjustmentTest.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2023, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale + * CA 94089 USA or visit www.azul.com if you need additional information or + * have any questions. + */ + +import jdk.test.lib.containers.docker.DockerTestUtils; +import jdk.test.lib.crac.CracBuilder; +import jdk.test.lib.crac.CracProcess; +import jdk.test.lib.crac.CracTest; +import jdk.test.lib.crac.CracTestArg; +import jdk.test.lib.process.OutputAnalyzer; + +import static jdk.test.lib.Asserts.assertEquals; +import static jdk.test.lib.Asserts.assertLessThan; + +import java.util.Arrays; + +/* + * @test ContainerPidAdjustmentTest + * @summary The test checks that process PID is adjusted with the specified value, when checkpointing in a container. Default min PID value is 128. + * @requires (os.family == "linux") + * @library /test/lib + * @build ContainerPidAdjustmentTest + * @run driver/timeout=60 jdk.test.lib.crac.CracTest true false INF true 128 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest true true 1 false 1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest true true 100 true 100 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false false INF false 1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 1 false 1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 1 true 1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 200 true 200 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 1000 false 1000 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 2000 true 2000 1000 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 0 true -1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true -10 true -1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true blabla true -1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 5000000 true -1 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 5000000 true -1 4194200 + * @run driver/timeout=60 jdk.test.lib.crac.CracTest false true 4194303 true -1 + + */ +public class ContainerPidAdjustmentTest implements CracTest { + @CracTestArg(0) + boolean runDirectly; + + @CracTestArg(1) + boolean needSetMinPid; + + @CracTestArg(2) + String lastPid; + + @CracTestArg(3) + boolean usePrivilegedContainer; + + @CracTestArg(4) + long expectedLastPid; + + @CracTestArg(value = 5, optional = true) + String lastPidSetup; + + final private String CURRENT_PID_MESSAGE = "Current PID = "; + + @Override + public void test() throws Exception { + if (!DockerTestUtils.canTestDocker()) { + return; + } + CracBuilder builder = new CracBuilder() + .inDockerImage("pid-adjustment") + .runContainerDirectly(runDirectly) + .containerUsePrivileged(usePrivilegedContainer); + if (needSetMinPid) { + builder.vmOption("-XX:CRaCMinPid=" + lastPid); + } + if (0 > expectedLastPid) { + builder.captureOutput(true); + } + if (null != lastPidSetup) { + // Set up the initial last pid, + // create a non-privileged user, + // and force spinning the last pid running checkpoint under the user. + builder + .containerSetup(Arrays.asList("bash", "-c", "useradd the_user && echo " + lastPidSetup + " >/proc/sys/kernel/ns_last_pid")) + .dockerCheckpointOptions(Arrays.asList("-u", "the_user")); + } + + if (0 < expectedLastPid) { + builder.startCheckpoint().waitForSuccess(); + } else { + final int expectedExitValue = (int)java.lang.Math.abs(expectedLastPid); + CracProcess process = builder.startCheckpoint(); + final int exitValue = process.waitFor(); + assertEquals(expectedExitValue, exitValue, "Process returned unexpected exit code: " + exitValue); + OutputAnalyzer oa = process.outputAnalyzer(); + oa.shouldNotContain(CURRENT_PID_MESSAGE); + if (null != lastPidSetup) { + oa.shouldContain("spin_last_pid: Invalid argument (" + lastPid + ")"); + } + } + } + + @Override + public void exec() throws Exception { + System.out.println(CURRENT_PID_MESSAGE + ProcessHandle.current().pid()); + assertLessThan((long)0, expectedLastPid, "Shouldn't happen"); + assertLessThan(expectedLastPid, ProcessHandle.current().pid()); + } +} diff --git a/test/jdk/jdk/crac/java/net/InetAddress/ResolveTest.java b/test/jdk/jdk/crac/java/net/InetAddress/ResolveTest.java index 921f3a5478b..92b2219b8d6 100644 --- a/test/jdk/jdk/crac/java/net/InetAddress/ResolveTest.java +++ b/test/jdk/jdk/crac/java/net/InetAddress/ResolveTest.java @@ -65,6 +65,7 @@ public void test() throws Exception { try { CompletableFuture firstOutputFuture = new CompletableFuture(); + builder.vmOption("-XX:CRaCMinPid=100"); CracProcess checkpointed = builder.startCheckpoint().watch(line -> { System.out.println("OUTPUT: " + line); if (line.equals("192.168.12.34")) { @@ -78,6 +79,7 @@ public void test() throws Exception { builder.checkpointViaJcmd(); checkpointed.waitForCheckpointed(); + builder.clearVmOptions(); builder.recreateContainer(imageName, "--add-host", TEST_HOSTNAME + ":192.168.56.78", "--volume", Utils.TEST_CLASSES + ":/second-run"); // any file/dir suffices diff --git a/test/lib/jdk/test/lib/crac/CracBuilder.java b/test/lib/jdk/test/lib/crac/CracBuilder.java index c571a192bde..90cdd97d0b5 100644 --- a/test/lib/jdk/test/lib/crac/CracBuilder.java +++ b/test/lib/jdk/test/lib/crac/CracBuilder.java @@ -46,6 +46,10 @@ public class CracBuilder { String dockerImageBaseVersion; String dockerImageName; private String[] dockerOptions; + private List dockerCheckpointOptions; + boolean containerUsePrivileged = true; + private List containerSetupCommand; + boolean runContainerDirectly = false; // make sure to update copy() when adding another field here boolean containerStarted; @@ -83,6 +87,10 @@ public CracBuilder copy() { other.captureOutput = captureOutput; other.dockerImageName = dockerImageName; other.dockerOptions = dockerOptions == null ? null : Arrays.copyOf(dockerOptions, dockerOptions.length); + other.dockerCheckpointOptions = dockerCheckpointOptions; + other.containerUsePrivileged = containerUsePrivileged; + other.containerSetupCommand = containerSetupCommand; + other.runContainerDirectly = runContainerDirectly; return other; } @@ -96,6 +104,26 @@ public CracBuilder debug(boolean debug) { return this; } + public CracBuilder dockerCheckpointOptions(List options) { + this.dockerCheckpointOptions = options; + return this; + } + + public CracBuilder containerSetup(List cmd) { + this.containerSetupCommand = cmd; + return this; + } + + public CracBuilder containerUsePrivileged(boolean usePrivileged) { + this.containerUsePrivileged = usePrivileged; + return this; + } + + public CracBuilder runContainerDirectly(boolean runDirectly) { + this.runContainerDirectly = runDirectly; + return this; + } + public CracBuilder classpathEntry(String cp) { classpathEntries.add(cp); return this; @@ -197,7 +225,11 @@ public CracProcess startCheckpoint(String... javaPrefix) throws Exception { } public CracProcess startCheckpoint(List javaPrefix) throws Exception { - ensureContainerStarted(); + if (runContainerDirectly) { + prepareContainer(); + } else { + ensureContainerStarted(); + } List cmd = prepareCommand(javaPrefix, false); cmd.add("-XX:CRaCCheckpointTo=" + imageDir); cmd.add(main().getName()); @@ -225,18 +257,36 @@ public void ensureContainerStarted() throws Exception { fail("CRAC_CRIU_PATH is not set and cannot find criu executable in any of: " + CRIU_CANDIDATES); } if (!containerStarted) { - ensureContainerKilled(); - buildDockerImage(); - FileUtils.deleteFileTreeWithRetry(Path.of(".", "jdk-docker")); - // Make sure we start with a clean image directory - DockerTestUtils.execute(Container.ENGINE_COMMAND, "volume", "rm", "cr"); + prepareContainer(); List cmd = prepareContainerCommand(dockerImageName, dockerOptions); log("Starting docker container:\n" + String.join(" ", cmd)); assertEquals(0, new ProcessBuilder().inheritIO().command(cmd).start().waitFor()); + containerSetup(); containerStarted = true; } } + private void prepareContainer() throws Exception { + if (runContainerDirectly && null != containerSetupCommand) { + fail("runContainerDirectly and containerSetupCommand cannot be used together."); + } + ensureContainerKilled(); + buildDockerImage(); + FileUtils.deleteFileTreeWithRetry(Path.of(".", "jdk-docker")); + // Make sure we start with a clean image directory + DockerTestUtils.execute(Container.ENGINE_COMMAND, "volume", "rm", "cr"); + } + + private void containerSetup() throws Exception { + if (null != containerSetupCommand && 0 < containerSetupCommand.size()) { + List cmd = new ArrayList<>(); + cmd.addAll(Arrays.asList(Container.ENGINE_COMMAND, "exec", CONTAINER_NAME)); + cmd.addAll(containerSetupCommand); + log("Container set up:\n" + String.join(" ", cmd)); + DockerTestUtils.execute(cmd).shouldHaveExitValue(0); + } + } + private void buildDockerImage() throws Exception { String previousBaseImageName = null; String previousBaseImageVersion = null; @@ -264,12 +314,17 @@ private void buildDockerImage() throws Exception { } } - private List prepareContainerCommand(String imageName, String[] options) { + private List prepareContainerCommandBase(String imageName, String[] options) { List cmd = new ArrayList<>(); cmd.add(Container.ENGINE_COMMAND); - cmd.addAll(Arrays.asList("run", "--rm", "-d")); - cmd.add("--privileged"); // required to give CRIU sufficient permissions - cmd.add("--init"); // otherwise the checkpointed process would not be reaped (by sleep with PID 1) + cmd.addAll(Arrays.asList("run", "--rm")); + if (!runContainerDirectly) { + cmd.add("-d"); + cmd.add("--init"); // otherwise the checkpointed process would not be reaped (by sleep with PID 1) + } + if (containerUsePrivileged) { + cmd.add("--privileged"); // required to give CRIU sufficient permissions + } int entryCounter = 0; for (var entry : Utils.TEST_CLASS_PATH.split(File.pathSeparator)) { cmd.addAll(Arrays.asList("--volume", entry + ":/cp/" + (entryCounter++))); @@ -285,6 +340,11 @@ private List prepareContainerCommand(String imageName, String[] options) cmd.addAll(Arrays.asList(options)); } cmd.add(imageName); + return cmd; + } + + private List prepareContainerCommand(String imageName, String[] options) { + List cmd = prepareContainerCommandBase(imageName, options); cmd.addAll(Arrays.asList("sleep", "3600")); return cmd; } @@ -296,18 +356,10 @@ public void ensureContainerKilled() throws Exception { public void recreateContainer(String imageName, String... options) throws Exception { assertTrue(containerStarted); - String minPid = DockerTestUtils.execute(Container.ENGINE_COMMAND, "exec", CONTAINER_NAME, - "cat", "/proc/sys/kernel/ns_last_pid").getStdout().trim(); DockerTestUtils.execute(Container.ENGINE_COMMAND, "kill", CONTAINER_NAME).getExitValue(); List cmd = prepareContainerCommand(imageName, options); log("Recreating docker container:\n" + String.join(" ", cmd)); assertEquals(0, new ProcessBuilder().inheritIO().command(cmd).start().waitFor()); - // We need to cycle PIDs; had we tried to restore right away the exec would get the - // same PIDs and restore would fail. - log("Cycling PIDs until %s%n", minPid); - DockerTestUtils.execute(Container.ENGINE_COMMAND, "exec", - CONTAINER_NAME, "bash", "-c", - "while [ $(cat /proc/sys/kernel/ns_last_pid) -le " + minPid + " ]; do cat /dev/null; done"); } public CracProcess doRestore(String... javaPrefix) throws Exception { @@ -369,7 +421,16 @@ private List prepareCommand(List javaPrefix, boolean isRestore) if (javaPrefix != null) { cmd.addAll(javaPrefix); } else if (dockerImageName != null) { - cmd.addAll(Arrays.asList(Container.ENGINE_COMMAND, "exec", CONTAINER_NAME)); + if (runContainerDirectly) { + cmd = prepareContainerCommandBase(dockerImageName, dockerOptions); + } else { + cmd.add(Container.ENGINE_COMMAND); + cmd.add("exec"); + if (null != dockerCheckpointOptions) { + cmd.addAll(dockerCheckpointOptions); + } + cmd.add(CONTAINER_NAME); + } cmd.add(DOCKER_JAVA); } else { cmd.add(JAVA);