Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle cgroups v2 in OsProbe (#77128) #77306

Merged
merged 4 commits into from
Sep 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog/77128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 77128
summary: Handle cgroups v2 in `OsProbe`
area: Infra/Core
type: enhancement
issues:
- 77126
- 76812
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,6 @@ public void test131InitProcessHasCorrectPID() {
/**
* Check that Elasticsearch reports per-node cgroup information.
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812")
public void test140CgroupOsStatsAreAvailable() throws Exception {
waitForElasticsearch(installation);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,18 @@ private static void verifyDefaultInstallation(Installation es, Distribution dist
/**
* Starts Elasticsearch, without checking that startup is successful.
*/
public static Shell.Result runElasticsearchStartCommand(Shell sh) throws IOException {
public static Shell.Result runElasticsearchStartCommand(Shell sh) {
if (isSystemd()) {
Packages.JournaldWrapper journald = new Packages.JournaldWrapper(sh);
sh.run("systemctl daemon-reload");
sh.run("systemctl enable elasticsearch.service");
sh.run("systemctl is-enabled elasticsearch.service");
return sh.runIgnoreExitCode("systemctl start elasticsearch.service");
Result exitCode = sh.runIgnoreExitCode("systemctl start elasticsearch.service");
if (exitCode.isSuccess() == false) {
logger.warn(sh.runIgnoreExitCode("systemctl status elasticsearch.service").stdout);
logger.warn(journald.getLogs().stdout);
}
return exitCode;
}
return sh.runIgnoreExitCode("service elasticsearch start");
}
Expand Down
272 changes: 217 additions & 55 deletions server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,11 @@ grant {
permission java.io.FilePermission "/proc/self/mountinfo", "read";
permission java.io.FilePermission "/proc/diskstats", "read";

// control group stats on Linux
// control group stats on Linux. cgroup v2 stats are in an unpredicable
// location under `/sys/fs/cgroup`, so unfortunately we have to allow
// read access to the entire directory hierarchy.
permission java.io.FilePermission "/proc/self/cgroup", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/-", "read";

// system memory on Linux systems affected by JDK bug (#66629)
permission java.io.FilePermission "/proc/meminfo", "read";
Expand Down
132 changes: 83 additions & 49 deletions server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public void testOsInfo() throws IOException {
final OsProbe osProbe = new OsProbe() {

@Override
List<String> readOsRelease() throws IOException {
List<String> readOsRelease() {
assert Constants.LINUX : Constants.OS_NAME;
if (prettyName != null) {
final String quote = randomFrom("\"", "'", "");
Expand Down Expand Up @@ -78,8 +78,10 @@ public void testOsStats() {
OsStats stats = osProbe.osStats();
assertNotNull(stats);
assertThat(stats.getTimestamp(), greaterThan(0L));
assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1),
is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))));
assertThat(
stats.getCpu().getPercent(),
anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))
);
double[] loadAverage = stats.getCpu().getLoadAverage();
if (loadAverage != null) {
assertThat(loadAverage.length, equalTo(3));
Expand Down Expand Up @@ -141,8 +143,11 @@ public void testOsStats() {
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
// These could be null if transported from a node running an older version, but shouldn't be null on the current node
assertThat(stats.getCgroup().getMemoryControlGroup(), notNullValue());
assertThat(stats.getCgroup().getMemoryLimitInBytes(), notNullValue());
assertThat(new BigInteger(stats.getCgroup().getMemoryLimitInBytes()), greaterThan(BigInteger.ZERO));
String memoryLimitInBytes = stats.getCgroup().getMemoryLimitInBytes();
assertThat(memoryLimitInBytes, notNullValue());
if (memoryLimitInBytes.equals("max") == false) {
assertThat(new BigInteger(memoryLimitInBytes), greaterThan(BigInteger.ZERO));
}
assertThat(stats.getCgroup().getMemoryUsageInBytes(), notNullValue());
assertThat(new BigInteger(stats.getCgroup().getMemoryUsageInBytes()), greaterThan(BigInteger.ZERO));
}
Expand Down Expand Up @@ -173,16 +178,14 @@ String readProcLoadavg() {
}

public void testCgroupProbe() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final boolean areCgroupStatsAvailable = randomBoolean();
final int availableCgroupsVersion = randomFrom(0, 1, 2);
final String hierarchy = randomAlphaOfLength(16);

final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy);
final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

if (areCgroupStatsAvailable) {
if (availableCgroupsVersion > 0) {
assertNotNull(cgroup);
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
Expand All @@ -200,61 +203,53 @@ public void testCgroupProbe() {
}

public void testCgroupProbeWithMissingCpuAcct() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about cpuacct
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(",cpuacct", ""))
.collect(Collectors.toList());

final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testCgroupProbeWithMissingCpu() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about cpu
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(":cpu,", ":"))
.collect(Collectors.toList());


final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testCgroupProbeWithMissingMemory() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about memory
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.filter(line -> line.contains(":memory:") == false)
.collect(Collectors.toList());

final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testGetTotalMemFromProcMeminfo() throws Exception {
int cgroupsVersion = randomFrom(1, 2);

// missing MemTotal line
List<String> meminfoLines = Arrays.asList(
"MemFree: 8467692 kB",
Expand All @@ -265,7 +260,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
OsProbe probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
OsProbe probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with invalid value
Expand All @@ -279,7 +274,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with invalid unit
Expand All @@ -293,7 +288,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with random valid value
Expand All @@ -308,7 +303,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L));
}

Expand All @@ -319,7 +314,13 @@ public void testGetTotalMemoryOnDebian8() throws Exception {
assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L));
}

private static List<String> getProcSelfGroupLines(String hierarchy) {
private static List<String> getProcSelfGroupLines(int cgroupsVersion, String hierarchy) {
// It doesn't really matter if cgroupsVersion == 0 here

if (cgroupsVersion == 2) {
return org.elasticsearch.core.List.of("0::/" + hierarchy);
}

return Arrays.asList(
"10:freezer:/",
"9:net_cls,net_prio:/",
Expand All @@ -331,32 +332,40 @@ private static List<String> getProcSelfGroupLines(String hierarchy) {
"3:perf_event:/",
"2:cpu,cpuacct,cpuset:/" + hierarchy,
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope",
"0::/cgroup2");
"0::/cgroup2"
);
}

private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) {
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy);
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) {
List<String> procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy);

return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines);
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines);
}

/**
* Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon
* the arguments to this method.
*
* @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself.
* @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this
* for itself.
* @param hierarchy a mock value used to generate a cgroup hierarchy.
* @param procSelfCgroupLines the lines that will be used as the content of <code>/proc/self/cgroup</code>
* @param procMeminfoLines lines that will be used as the content of <code>/proc/meminfo</code>
* @return a test instance
*/
private static OsProbe buildStubOsProbe(
final boolean areCgroupStatsAvailable,
final int availableCgroupsVersion,
final String hierarchy,
List<String> procSelfCgroupLines,
List<String> procMeminfoLines
) {
return new OsProbe() {
@Override
OsStats.Cgroup getCgroup(boolean isLinux) {
// Pretend we're always on Linux so that we can run the cgroup tests
return super.getCgroup(true);
}

@Override
List<String> readProcSelfCgroup() {
return procSelfCgroupLines;
Expand All @@ -382,10 +391,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {

@Override
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
return Arrays.asList(
"nr_periods 17992",
"nr_throttled 1311",
"throttled_time 139298645489");
return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489");
}

@Override
Expand All @@ -403,22 +409,50 @@ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) {

@Override
boolean areCgroupStatsAvailable() {
return areCgroupStatsAvailable;
return availableCgroupsVersion > 0;
}

@Override
List<String> readProcMeminfo() throws IOException {
List<String> readProcMeminfo() {
return procMeminfoLines;
}

@Override
String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
// This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
return "18446744073709551615";
}

@Override
String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "4796416";
}

@Override
List<String> readCgroupV2CpuStats(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return org.elasticsearch.core.List.of(
"usage_usec 364869866063112",
"user_usec 34636",
"system_usec 9896",
"nr_periods 17992",
"nr_throttled 1311",
"throttled_usec 139298645489"
);
}

@Override
String readCgroupV2CpuLimit(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "50000 100000";
}
};
}

private static OsProbe buildStubOsProbe(
final boolean areCgroupStatsAvailable,
final String hierarchy,
List<String> procSelfCgroupLines
) {
return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines, org.elasticsearch.core.List.of());
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy, List<String> procSelfCgroupLines) {
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines, org.elasticsearch.core.List.of());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ static long machineMemoryFromStats(OsStats stats) {
OsStats.Cgroup cgroup = stats.getCgroup();
if (cgroup != null) {
String containerLimitStr = cgroup.getMemoryLimitInBytes();
if (containerLimitStr != null) {
if (containerLimitStr != null && containerLimitStr.equals("max") == false) {
BigInteger containerLimit = new BigInteger(containerLimitStr);
if ((containerLimit.compareTo(BigInteger.valueOf(mem)) < 0 && containerLimit.compareTo(BigInteger.ZERO) > 0)
// mem <= 0 means the value couldn't be obtained for some reason
Expand Down