Skip to content

Commit a5327f0

Browse files
committed
FIX: Missing mesos-agent metrics when using cgroupsv2.
1 parent 39d2608 commit a5327f0

File tree

3 files changed

+106
-100
lines changed

3 files changed

+106
-100
lines changed

CHANGELOG

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
Release Notes - Clusterd
1+
Release Notes - Clusterd - Version 1.11.0-0.6.1
22
-----------------------------------------------
33

4+
* FIX: Missing mesos-agent metrics when using cgroupsv2.
5+
46

57
Release Notes - Clusterd - Version 1.11.0-0.6.0
68
-----------------------------------------------
79

810
* Update zookeeper client to version 3.9.2.
9-
11+
1012

1113
Release Notes - Clusterd - Version 1.11.0-0.5.1
1214
-----------------------------------------------

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ project(aventer-mesos)
2626
set(MESOS_MAJOR_VERSION 1)
2727
set(MESOS_MINOR_VERSION 11)
2828
set(MESOS_PATCH_VERSION 0)
29-
set(MESOS_SUB_VERSION 0.6.0)
29+
set(MESOS_SUB_VERSION 0.6.1)
3030
set(PACKAGE_VERSION
3131
${MESOS_MAJOR_VERSION}.${MESOS_MINOR_VERSION}.${MESOS_PATCH_VERSION})
3232

src/slave/containerizer/docker.cpp

Lines changed: 101 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060

6161
#ifdef __linux__
6262
#include "linux/cgroups.hpp"
63+
#include "linux/cgroups2.hpp"
6364
#include "linux/fs.hpp"
6465
#include "linux/systemd.hpp"
6566
#endif // __linux__
@@ -2227,124 +2228,127 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
22272228
#ifndef __linux__
22282229
return Error("Does not support cgroups on non-linux platform");
22292230
#else
2230-
static const Result<string> cpuacctHierarchy = cgroups::hierarchy("cpuacct");
2231-
static const Result<string> memHierarchy = cgroups::hierarchy("memory");
2232-
2233-
// NOTE: Normally, a Docker container should be in its own cgroup.
2234-
// However, a zombie process (exited but not reaped) will be
2235-
// temporarily moved into the system root cgroup. We add some
2236-
// defensive check here to make sure we are not reporting statistics
2237-
// for the root cgroup. See MESOS-8480 for details.
2238-
const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
2239-
2240-
if (cpuacctHierarchy.isError()) {
2241-
return Error(
2242-
"Failed to determine the cgroup 'cpuacct' subsystem hierarchy: " +
2243-
cpuacctHierarchy.error());
2244-
}
2245-
2246-
if (memHierarchy.isError()) {
2247-
return Error(
2248-
"Failed to determine the cgroup 'memory' subsystem hierarchy: " +
2249-
memHierarchy.error());
2250-
}
2251-
2252-
const Result<string> cpuacctCgroup = cgroups::cpuacct::cgroup(pid);
2253-
if (cpuacctCgroup.isError()) {
2254-
return Error(
2255-
"Failed to determine cgroup for the 'cpuacct' subsystem: " +
2256-
cpuacctCgroup.error());
2257-
} else if (cpuacctCgroup.isNone()) {
2258-
return Error("Unable to find 'cpuacct' cgroup subsystem");
2259-
} else if (cpuacctCgroup.get() == systemRootCgroup) {
2260-
return Error(
2261-
"Process '" + stringify(pid) +
2262-
"' should not be in the system root cgroup (being destroyed?)");
2263-
}
2264-
2265-
const Result<string> memCgroup = cgroups::memory::cgroup(pid);
2266-
if (memCgroup.isError()) {
2267-
return Error(
2268-
"Failed to determine cgroup for the 'memory' subsystem: " +
2269-
memCgroup.error());
2270-
} else if (memCgroup.isNone()) {
2271-
return Error("Unable to find 'memory' cgroup subsystem");
2272-
} else if (memCgroup.get() == systemRootCgroup) {
2273-
return Error(
2274-
"Process '" + stringify(pid) +
2275-
"' should not be in the system root cgroup (being destroyed?)");
2276-
}
2277-
2278-
const Try<cgroups::cpuacct::Stats> cpuAcctStat =
2279-
cgroups::cpuacct::stat(cpuacctHierarchy.get(), cpuacctCgroup.get());
2280-
2281-
if (cpuAcctStat.isError()) {
2282-
return Error("Failed to get cpu.stat: " + cpuAcctStat.error());
2283-
}
2284-
2285-
const Try<hashmap<string, uint64_t>> memStats =
2286-
cgroups::stat(memHierarchy.get(), memCgroup.get(), "memory.stat");
2231+
ResourceStatistics result;
22872232

2288-
if (memStats.isError()) {
2289-
return Error(
2290-
"Error getting memory statistics from cgroups memory subsystem: " +
2291-
memStats.error());
2292-
}
2233+
if (!cgroups2::enabled()) {
2234+
static const Result<string> cpuacctHierarchy = cgroups::hierarchy("cpuacct");
2235+
static const Result<string> memHierarchy = cgroups::hierarchy("memory");
22932236

2294-
if (!memStats->contains("rss")) {
2295-
return Error("cgroups memory stats does not contain 'rss' data");
2296-
}
2237+
// NOTE: Normally, a Docker container should be in its own cgroup.
2238+
// However, a zombie process (exited but not reaped) will be
2239+
// temporarily moved into the system root cgroup. We add some
2240+
// defensive check here to make sure we are not reporting statistics
2241+
// for the root cgroup. See MESOS-8480 for details.
2242+
const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
22972243

2298-
ResourceStatistics result;
2299-
result.set_timestamp(Clock::now().secs());
2300-
result.set_cpus_system_time_secs(cpuAcctStat->system.secs());
2301-
result.set_cpus_user_time_secs(cpuAcctStat->user.secs());
2302-
result.set_mem_rss_bytes(memStats->at("rss"));
2244+
if (cpuacctHierarchy.isError()) {
2245+
return Error(
2246+
"Failed to determine the cgroup 'cpuacct' subsystem hierarchy: " +
2247+
cpuacctHierarchy.error());
2248+
}
23032249

2304-
// Add the cpu.stat information only if CFS is enabled.
2305-
if (flags.cgroups_enable_cfs) {
2306-
static const Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
2250+
if (memHierarchy.isError()) {
2251+
return Error(
2252+
"Failed to determine the cgroup 'memory' subsystem hierarchy: " +
2253+
memHierarchy.error());
2254+
}
23072255

2308-
if (cpuHierarchy.isError()) {
2256+
const Result<string> cpuacctCgroup = cgroups::cpuacct::cgroup(pid);
2257+
if (cpuacctCgroup.isError()) {
2258+
return Error(
2259+
"Failed to determine cgroup for the 'cpuacct' subsystem: " +
2260+
cpuacctCgroup.error());
2261+
} else if (cpuacctCgroup.isNone()) {
2262+
return Error("Unable to find 'cpuacct' cgroup subsystem");
2263+
} else if (cpuacctCgroup.get() == systemRootCgroup) {
23092264
return Error(
2310-
"Failed to determine the cgroup 'cpu' subsystem hierarchy: " +
2311-
cpuHierarchy.error());
2265+
"Process '" + stringify(pid) +
2266+
"' should not be in the system root cgroup (being destroyed?)");
23122267
}
23132268

2314-
const Result<string> cpuCgroup = cgroups::cpu::cgroup(pid);
2315-
if (cpuCgroup.isError()) {
2269+
const Result<string> memCgroup = cgroups::memory::cgroup(pid);
2270+
if (memCgroup.isError()) {
23162271
return Error(
2317-
"Failed to determine cgroup for the 'cpu' subsystem: " +
2318-
cpuCgroup.error());
2319-
} else if (cpuCgroup.isNone()) {
2320-
return Error("Unable to find 'cpu' cgroup subsystem");
2321-
} else if (cpuCgroup.get() == systemRootCgroup) {
2272+
"Failed to determine cgroup for the 'memory' subsystem: " +
2273+
memCgroup.error());
2274+
} else if (memCgroup.isNone()) {
2275+
return Error("Unable to find 'memory' cgroup subsystem");
2276+
} else if (memCgroup.get() == systemRootCgroup) {
23222277
return Error(
23232278
"Process '" + stringify(pid) +
23242279
"' should not be in the system root cgroup (being destroyed?)");
23252280
}
23262281

2327-
const Try<hashmap<string, uint64_t>> stat =
2328-
cgroups::stat(cpuHierarchy.get(), cpuCgroup.get(), "cpu.stat");
2282+
const Try<cgroups::cpuacct::Stats> cpuAcctStat =
2283+
cgroups::cpuacct::stat(cpuacctHierarchy.get(), cpuacctCgroup.get());
23292284

2330-
if (stat.isError()) {
2331-
return Error("Failed to read cpu.stat: " + stat.error());
2285+
if (cpuAcctStat.isError()) {
2286+
return Error("Failed to get cpu.stat: " + cpuAcctStat.error());
23322287
}
23332288

2334-
Option<uint64_t> nr_periods = stat->get("nr_periods");
2335-
if (nr_periods.isSome()) {
2336-
result.set_cpus_nr_periods(nr_periods.get());
2289+
const Try<hashmap<string, uint64_t>> memStats =
2290+
cgroups::stat(memHierarchy.get(), memCgroup.get(), "memory.stat");
2291+
2292+
if (memStats.isError()) {
2293+
return Error(
2294+
"Error getting memory statistics from cgroups memory subsystem: " +
2295+
memStats.error());
23372296
}
23382297

2339-
Option<uint64_t> nr_throttled = stat->get("nr_throttled");
2340-
if (nr_throttled.isSome()) {
2341-
result.set_cpus_nr_throttled(nr_throttled.get());
2298+
if (!memStats->contains("rss")) {
2299+
return Error("cgroups memory stats does not contain 'rss' data");
23422300
}
23432301

2344-
Option<uint64_t> throttled_time = stat->get("throttled_time");
2345-
if (throttled_time.isSome()) {
2346-
result.set_cpus_throttled_time_secs(
2347-
Nanoseconds(throttled_time.get()).secs());
2302+
result.set_timestamp(Clock::now().secs());
2303+
result.set_cpus_system_time_secs(cpuAcctStat->system.secs());
2304+
result.set_cpus_user_time_secs(cpuAcctStat->user.secs());
2305+
result.set_mem_rss_bytes(memStats->at("rss"));
2306+
2307+
// Add the cpu.stat information only if CFS is enabled.
2308+
if (flags.cgroups_enable_cfs) {
2309+
static const Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
2310+
2311+
if (cpuHierarchy.isError()) {
2312+
return Error(
2313+
"Failed to determine the cgroup 'cpu' subsystem hierarchy: " +
2314+
cpuHierarchy.error());
2315+
}
2316+
2317+
const Result<string> cpuCgroup = cgroups::cpu::cgroup(pid);
2318+
if (cpuCgroup.isError()) {
2319+
return Error(
2320+
"Failed to determine cgroup for the 'cpu' subsystem: " +
2321+
cpuCgroup.error());
2322+
} else if (cpuCgroup.isNone()) {
2323+
return Error("Unable to find 'cpu' cgroup subsystem");
2324+
} else if (cpuCgroup.get() == systemRootCgroup) {
2325+
return Error(
2326+
"Process '" + stringify(pid) +
2327+
"' should not be in the system root cgroup (being destroyed?)");
2328+
}
2329+
2330+
const Try<hashmap<string, uint64_t>> stat =
2331+
cgroups::stat(cpuHierarchy.get(), cpuCgroup.get(), "cpu.stat");
2332+
2333+
if (stat.isError()) {
2334+
return Error("Failed to read cpu.stat: " + stat.error());
2335+
}
2336+
2337+
Option<uint64_t> nr_periods = stat->get("nr_periods");
2338+
if (nr_periods.isSome()) {
2339+
result.set_cpus_nr_periods(nr_periods.get());
2340+
}
2341+
2342+
Option<uint64_t> nr_throttled = stat->get("nr_throttled");
2343+
if (nr_throttled.isSome()) {
2344+
result.set_cpus_nr_throttled(nr_throttled.get());
2345+
}
2346+
2347+
Option<uint64_t> throttled_time = stat->get("throttled_time");
2348+
if (throttled_time.isSome()) {
2349+
result.set_cpus_throttled_time_secs(
2350+
Nanoseconds(throttled_time.get()).secs());
2351+
}
23482352
}
23492353
}
23502354

0 commit comments

Comments
 (0)