Skip to content

Commit

Permalink
Correct System.nanotime() value after restore
Browse files Browse the repository at this point in the history
Reviewed-by: akozlov
  • Loading branch information
Radim Vansa committed Jun 19, 2023
1 parent e75dbb2 commit a8e69de
Show file tree
Hide file tree
Showing 7 changed files with 312 additions and 27 deletions.
63 changes: 53 additions & 10 deletions src/hotspot/os/linux/os_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class CracRestoreParameters : public CHeapObj<mtInternal> {

struct header {
jlong _restore_time;
jlong _restore_counter;
jlong _restore_nanos;
int _nflags;
int _nprops;
int _env_memory_size;
Expand Down Expand Up @@ -324,10 +324,10 @@ class CracRestoreParameters : public CHeapObj<mtInternal> {
const SystemProperty* props,
const char *args,
jlong restore_time,
jlong restore_counter) {
jlong restore_nanos) {
header hdr = {
restore_time,
restore_counter,
restore_nanos,
num_flags,
system_props_length(props),
env_vars_size(environ)
Expand Down Expand Up @@ -440,7 +440,7 @@ static char* _crengine_arg_str = NULL;
static unsigned int _crengine_argc = 0;
static const char* _crengine_args[32];
static jlong _restore_start_time;
static jlong _restore_start_counter;
static jlong _restore_start_nanos;
static FdsInfo _vm_inited_fds(false);

// If the VM might have been created on the primordial thread, we need to resolve the
Expand Down Expand Up @@ -5710,10 +5710,10 @@ jlong os::Linux::restore_start_time() {
}

jlong os::Linux::uptime_since_restore() {
if (!_restore_start_counter) {
if (!_restore_start_nanos) {
return -1;
}
return javaTimeNanos() - _restore_start_counter;
return javaTimeNanos() - _restore_start_nanos;
}

void VM_Crac::trace_cr(const char* msg, ...) {
Expand Down Expand Up @@ -6004,6 +6004,7 @@ class CracSHM {
};

static int checkpoint_restore(int *shmid) {
os::record_time_before_checkpoint();

int cres = call_crengine();
if (cres < 0) {
Expand All @@ -6021,6 +6022,8 @@ static int checkpoint_restore(int *shmid) {
} while (sig == -1 && errno == EINTR);
assert(sig == RESTORE_SIGNAL, "got what requested");

os::update_javaTimeNanos_offset();

if (CRTraceStartupTime) {
tty->print_cr("STARTUPTIME " JLONG_FORMAT " restore-native", os::javaTimeNanos());
}
Expand Down Expand Up @@ -6192,7 +6195,9 @@ void VM_Crac::doit() {

if (shmid <= 0 || !VM_Crac::read_shm(shmid)) {
_restore_start_time = os::javaTimeMillis();
_restore_start_counter = os::javaTimeNanos();
_restore_start_nanos = os::javaTimeNanos();
} else {
_restore_start_nanos += os::monotonic_time_offset();
}
PerfMemoryLinux::restore();

Expand Down Expand Up @@ -6296,7 +6301,7 @@ void os::Linux::restore() {
struct stat st;

jlong restore_time = javaTimeMillis();
jlong restore_counter = javaTimeNanos();
jlong restore_nanos = javaTimeNanos();

compute_crengine();

Expand All @@ -6310,7 +6315,7 @@ void os::Linux::restore() {
Arguments::system_properties(),
Arguments::java_command() ? Arguments::java_command() : "",
restore_time,
restore_counter)) {
restore_nanos)) {
char strid[32];
snprintf(strid, sizeof(strid), "%d", id);
setenv("CRAC_NEW_ARGS_ID", strid, true);
Expand Down Expand Up @@ -6412,7 +6417,7 @@ bool CracRestoreParameters::read_from(int fd) {
char* cursor = _raw_content + sizeof(header);

::_restore_start_time = hdr->_restore_time;
::_restore_start_counter = hdr->_restore_counter;
::_restore_start_nanos = hdr->_restore_nanos;

for (int i = 0; i < hdr->_nflags; i++) {
FormatBuffer<80> err_msg("%s", "");
Expand Down Expand Up @@ -6491,3 +6496,41 @@ void os::print_memory_mappings(char* addr, size_t bytes, outputStream* st) {
st->cr();
}
}

static bool read_all(int fd, char *dest, size_t n) {
size_t rd = 0;
do {
ssize_t r = ::read(fd, dest + rd, n - rd);
if (r == 0) {
return false;
} else if (r < 0) {
if (errno == EINTR) {
continue;
}
return false;
}
rd += r;
} while (rd < n);
return true;
}

bool os::read_bootid(char *dest) {
int fd = ::open("/proc/sys/kernel/random/boot_id", O_RDONLY);
if (fd < 0 || !read_all(fd, dest, UUID_LENGTH)) {
perror("CRaC: Cannot read system boot ID");
return false;
}
char c;
if (!read_all(fd, &c, 1) || c != '\n') {
perror("CRaC: system boot ID does not end with newline");
return false;
}
if (::read(fd, &c, 1) != 0) {
perror("CRaC: Unexpected data/error reading system boot ID");
return false;
}
if (::close(fd) != 0) {
perror("CRaC: Cannot close system boot ID file");
}
return true;
}
2 changes: 1 addition & 1 deletion src/hotspot/os/posix/os_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1410,7 +1410,7 @@ jlong os::javaTimeNanos() {
struct timespec tp;
int status = clock_gettime(CLOCK_MONOTONIC, &tp);
assert(status == 0, "clock_gettime error: %s", os::strerror(errno));
jlong result = jlong(tp.tv_sec) * NANOSECS_PER_SEC + jlong(tp.tv_nsec);
jlong result = jlong(tp.tv_sec) * NANOSECS_PER_SEC + jlong(tp.tv_nsec) + javaTimeNanos_offset;
return result;
}

Expand Down
44 changes: 44 additions & 0 deletions src/hotspot/share/runtime/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ int os::_processor_count = 0;
int os::_initial_active_processor_count = 0;
os::PageSizes os::_page_sizes;

// Timestamps recorded before checkpoint
jlong os::checkpoint_millis;
jlong os::checkpoint_nanos;
char os::checkpoint_bootid[UUID_LENGTH];
// Value based on wall clock time difference that will guarantee monotonic
// System.nanoTime() close to actual wall-clock time difference.
jlong os::javaTimeNanos_offset = 0;

#ifndef PRODUCT
julong os::num_mallocs = 0; // # of calls to malloc/realloc
julong os::alloc_bytes = 0; // # of bytes allocated
Expand Down Expand Up @@ -2023,3 +2031,39 @@ void os::PageSizes::print_on(outputStream* st) const {
st->print("empty");
}
}

void os::record_time_before_checkpoint() {
checkpoint_millis = javaTimeMillis();
checkpoint_nanos = javaTimeNanos();
memset(checkpoint_bootid, 0, UUID_LENGTH);
read_bootid(checkpoint_bootid);
}

void os::update_javaTimeNanos_offset() {
char buf[UUID_LENGTH];
// We will change the nanotime offset only if this is not the same boot
// to prevent reducing the accuracy of System.nanoTime() unnecessarily.
// It is possible that in a real-world case the boot_id does not change
// (containers keep the boot_id) - but the monotonic time changes. We will
// only guarantee that the nanotime does not go backwards in that case but
// won't offset the time based on wall-clock time as this change in monotonic
// time is likely intentional.
if (!read_bootid(buf) || memcmp(buf, checkpoint_bootid, UUID_LENGTH) != 0) {
assert(checkpoint_millis >= 0, "Restore without a checkpoint?");
long diff_millis = javaTimeMillis() - checkpoint_millis;
// If the wall clock has gone backwards we won't add it to the offset
if (diff_millis < 0) {
diff_millis = 0;
}
// javaTimeNanos() call on the second line below uses the *_offset, so we will zero
// it to make the call return true monotonic time rather than the adjusted value.
javaTimeNanos_offset = 0;
javaTimeNanos_offset = checkpoint_nanos - javaTimeNanos() + diff_millis * 1000000L;
} else {
// ensure monotonicity even if this looks like the same boot
jlong diff = javaTimeNanos() - checkpoint_nanos;
if (diff < 0) {
javaTimeNanos_offset -= diff;
}
}
}
16 changes: 16 additions & 0 deletions src/hotspot/share/runtime/os.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ enum WXMode {
WXExec
};

// xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
#define UUID_LENGTH 36

// Executable parameter flag for os::commit_memory() and
// os::commit_memory_or_exit().
const bool ExecMem = true;
Expand Down Expand Up @@ -130,6 +133,11 @@ class os: AllStatic {
static address _polling_page;
static PageSizes _page_sizes;

static jlong checkpoint_millis;
static jlong checkpoint_nanos;
static char checkpoint_bootid[UUID_LENGTH];
static jlong javaTimeNanos_offset;

static char* pd_reserve_memory(size_t bytes, bool executable);

static char* pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool executable);
Expand Down Expand Up @@ -174,6 +182,8 @@ class os: AllStatic {

LINUX_ONLY(static void pd_init_container_support();)

static bool read_bootid(char *dest);

public:
static void init(void); // Called before command line parsing

Expand All @@ -199,6 +209,12 @@ class os: AllStatic {
static void javaTimeSystemUTC(jlong &seconds, jlong &nanos);
static void run_periodic_checks();

static void record_time_before_checkpoint();
static void update_javaTimeNanos_offset();
static jlong monotonic_time_offset() {
return javaTimeNanos_offset;
}

// Returns the elapsed time in seconds since the vm started.
static double elapsedTime();

Expand Down
138 changes: 138 additions & 0 deletions test/jdk/jdk/crac/java/lang/System/NanoTimeTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright (c) 2023, Azul Systems, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

import jdk.crac.*;
import jdk.crac.management.CRaCMXBean;
import jdk.test.lib.Container;
import jdk.test.lib.containers.docker.Common;
import jdk.test.lib.containers.docker.DockerTestUtils;
import jdk.test.lib.crac.CracBuilder;
import jdk.test.lib.crac.CracTest;
import jdk.test.lib.crac.CracTestArg;

import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;

import static jdk.test.lib.Asserts.*;

/**
* @test NanoTimeTest
* @requires (os.family == "linux")
* @requires docker.support
* @library /test/lib
* @build NanoTimeTest
* @run driver jdk.test.lib.crac.CracTest 0 true
* @run driver jdk.test.lib.crac.CracTest 86400 true
* @run driver jdk.test.lib.crac.CracTest -86400 true
* @run driver jdk.test.lib.crac.CracTest 86400 false
* @run driver jdk.test.lib.crac.CracTest -86400 false
*/
public class NanoTimeTest implements CracTest {
private static final String imageName = Common.imageName("system-nanotime");

@CracTestArg(0)
long monotonicOffset;

@CracTestArg(1)
boolean changeBootId;

@Override
public void test() throws Exception {
if (!DockerTestUtils.canTestDocker()) {
return;
}
CracBuilder builder = new CracBuilder();
Path bootIdFile = Files.createTempFile("NanoTimeTest-", "-boot_id");
try {
// TODO: use more official image
builder.withBaseImage("ghcr.io/crac/test-base", "latest")
.dockerOptions("-v", bootIdFile + ":/fake_boot_id")
.inDockerImage(imageName);

Files.writeString(bootIdFile, "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\n");
// We need to preload the library before checkpoint
builder.doCheckpoint(Container.ENGINE_COMMAND, "exec",
"-e", "LD_PRELOAD=/opt/path-mapping-quiet.so",
"-e", "PATH_MAPPING=/proc/sys/kernel/random/boot_id:/fake_boot_id",
CracBuilder.CONTAINER_NAME,
// In case we are trying to use negative monotonic offset we could
// run into situation where we'd set it to negative value (prohibited).
// Therefore, we'll rather offset it to the future before checkpoint
// and set to 0 for restore.
"unshare", "--fork", "--time", "--monotonic", String.valueOf(Math.max(-monotonicOffset, 0)),
CracBuilder.DOCKER_JAVA);

if (changeBootId) {
Files.writeString(bootIdFile, "yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy\n");
}

builder.doRestore(Container.ENGINE_COMMAND, "exec", CracBuilder.CONTAINER_NAME,
"unshare", "--fork", "--time", "--boottime", "86400", "--monotonic", String.valueOf(Math.max(monotonicOffset, 0)),
CracBuilder.DOCKER_JAVA);
} finally {
builder.ensureContainerKilled();
assertTrue(bootIdFile.toFile().delete());
}
}

@Override
public void exec() throws Exception {
System.out.println("Expected offset: " + monotonicOffset);
// We use uptime to assert that changing the clock worked
long boottimeBefore = readSystemUptime();

long before = System.nanoTime();
Core.checkpointRestore();
long after = System.nanoTime();
System.out.println("Before: " + before);
System.out.println("After: " + after);
assertLTE(before, after, "After < Before");
if (changeBootId || monotonicOffset <= 0) {
// Even though we have shifted the monotic offset by a day the difference
// is adjusted by difference between wall clock time before and after;
// the difference in monotonic time is considered "random"
assertLT(after, before + TimeUnit.HOURS.toNanos(1), "After too late");
} else {
assertGT(after, before + TimeUnit.HOURS.toNanos(1), "After too early");
assertLT(after, before + TimeUnit.HOURS.toNanos(25), "After too late");
}
long boottimeAfter = readSystemUptime();
assertGTE(boottimeAfter, boottimeBefore + 86_400_000, "Boottime was not changed");
RuntimeMXBean runtimeMX = ManagementFactory.getRuntimeMXBean();
assertGTE(runtimeMX.getUptime(), 0L, "VM Uptime is negative!");
CRaCMXBean cracBean = CRaCMXBean.getCRaCMXBean();
assertLT(cracBean.getUptimeSinceRestore(), 60_000L);
assertGTE(cracBean.getUptimeSinceRestore(), 0L);
}

private long readSystemUptime() throws IOException {
String uptimeStr = Files.readString(Path.of("/proc/uptime"));
String[] parts = uptimeStr.split(" ");
return (long)(Double.parseDouble(parts[0]) * 1000);
}
}
Loading

0 comments on commit a8e69de

Please sign in to comment.