@@ -2079,12 +2079,19 @@ Future<ResourceStatistics> DockerContainerizerProcess::usage(
2079
2079
ResourceStatistics result;
2080
2080
2081
2081
#ifdef __linux__
2082
- const Try<ResourceStatistics> cgroupStats = cgroupsStatistics (pid);
2083
- if (cgroupStats.isError ()) {
2084
- return Failure (" Failed to collect cgroup stats: " + cgroupStats.error ());
2082
+ if (!cgroups2::enabled ()) {
2083
+ const Try<ResourceStatistics> cgroupStats = cgroupsStatistics (pid);
2084
+ if (cgroupStats.isError ()) {
2085
+ return Failure (" Failed to collect cgroup stats: " + cgroupStats.error ());
2086
+ }
2087
+ result = cgroupStats.get ();
2088
+ } else {
2089
+ const Try<ResourceStatistics> cgroupStats = cgroupsv2Statistics (containerId);
2090
+ if (cgroupStats.isError ()) {
2091
+ return Failure (" Failed to collect cgroupv2 stats: " + cgroupStats.error ());
2092
+ }
2093
+ result = cgroupStats.get ();
2085
2094
}
2086
-
2087
- result = cgroupStats.get ();
2088
2095
#endif // __linux__
2089
2096
2090
2097
Option<double > cpuRequest, cpuLimit, memLimit;
@@ -2221,134 +2228,229 @@ Future<ResourceStatistics> DockerContainerizerProcess::usage(
2221
2228
}));
2222
2229
}
2223
2230
2231
+ Try<std::string> DockerContainerizerProcess::getCgroupV2Path (pid_t pid) const {
2232
+ std::string path = " /proc/" + std::to_string (pid) + " /cgroup" ;
2233
+ std::ifstream file (path);
2234
+ if (!file.is_open ()) {
2235
+ return Error (" Error open cgroup file: " + path);
2236
+ }
2237
+
2238
+ std::string line;
2239
+ while (std::getline (file, line)) {
2240
+ if (line.rfind (" 0::" , 0 ) == 0 ) {
2241
+ size_t pos = line.find (" ::" );
2242
+ if (pos != std::string::npos && pos + 2 < line.size ()) {
2243
+ return line.substr (pos + 2 );
2244
+ }
2245
+ }
2246
+ }
2247
+
2248
+ return Error (" Could not find cgroup for PID " + std::to_string (pid));
2249
+ }
2250
+
2251
+ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsv2Statistics (ContainerID containerId) const
2252
+ {
2253
+ #ifndef __linux__
2254
+ return Error (" Does not support cgroups on non-linux platform" );
2255
+ #else
2256
+
2257
+ if (!containers_.contains (containerId)) {
2258
+ return Error (" Unknown container " + stringify (containerId));
2259
+ }
2260
+
2261
+ Container* container = containers_.at (containerId);
2262
+
2263
+ Try<std::string> cgPath = getCgroupV2Path (container->pid .get ());
2264
+ if (cgPath.isError ()) {
2265
+ return Error (cgPath.error ());
2266
+ }
2267
+
2268
+ std::stringstream sc;
2269
+ sc << flags.cgroups_hierarchy << cgPath.get ();
2270
+ const string cgroup = sc.str ();
2271
+
2272
+ Try<cgroups2::cpu::Stats> cpuStats = cgroups2::cpu::stats (cgroup);
2273
+ if (cpuStats.isError ()) {
2274
+ return Error (" Failed to get cgroup CPU stats: " + cpuStats.error ());
2275
+ }
2276
+
2277
+ ResourceStatistics usage;
2278
+ usage.set_cpus_user_time_secs (cpuStats->user_time .secs ());
2279
+ usage.set_cpus_system_time_secs (cpuStats->system_time .secs ());
2280
+
2281
+ if (cpuStats->periods .isSome ()) {
2282
+ usage.set_cpus_nr_periods (*cpuStats->periods );
2283
+ }
2284
+ if (cpuStats->throttled .isSome ()) {
2285
+ usage.set_cpus_nr_throttled (*cpuStats->throttled );
2286
+ }
2287
+ if (cpuStats->throttle_time .isSome ()) {
2288
+ usage.set_cpus_throttled_time_secs (cpuStats->throttle_time ->secs ());
2289
+ }
2290
+
2291
+ if (cpuStats->periods .isNone ()
2292
+ || cpuStats->throttled .isNone ()
2293
+ || cpuStats->throttle_time .isNone ()) {
2294
+ LOG (ERROR) << " cpu throttling stats missing for cgroup '" << cgroup << " '"
2295
+ " despite the 'cpu' controller being enabled" ;
2296
+ }
2297
+
2298
+ Try<cgroups2::memory::Stats> memoryStats = cgroups2::memory::stats (cgroup);
2299
+ if (memoryStats.isError ()) {
2300
+ return Error (" Failed to get cgroup memory stats: " + memoryStats.error ());
2301
+ }
2302
+
2303
+ // Kernel memory usage.
2304
+ usage.set_mem_kmem_usage_bytes (memoryStats->kernel .bytes ());
2305
+
2306
+ // Kernel TCP buffers usage.
2307
+ usage.set_mem_kmem_tcp_usage_bytes (memoryStats->sock .bytes ());
2308
+
2309
+ // Page cache usage.
2310
+ usage.set_mem_file_bytes (memoryStats->file .bytes ());
2311
+ usage.set_mem_cache_bytes (memoryStats->file .bytes ());
2312
+
2313
+ // Anonymous memory usage.
2314
+ usage.set_mem_anon_bytes (memoryStats->anon .bytes ());
2315
+ usage.set_mem_rss_bytes (memoryStats->anon .bytes ());
2316
+
2317
+ // File mapped memory usage.
2318
+ usage.set_mem_mapped_file_bytes (memoryStats->file_mapped .bytes ());
2319
+
2320
+ // Total unevictable memory.
2321
+ usage.set_mem_unevictable_bytes (memoryStats->unevictable .bytes ());
2322
+
2323
+ return usage;
2324
+ #endif // __linux__
2325
+ }
2326
+
2224
2327
2225
2328
Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics (
2226
2329
pid_t pid) const
2227
2330
{
2228
2331
#ifndef __linux__
2229
2332
return Error (" Does not support cgroups on non-linux platform" );
2230
2333
#else
2231
- ResourceStatistics result;
2232
2334
2233
- if (!cgroups2::enabled ()) {
2234
- static const Result<string> cpuacctHierarchy = cgroups::hierarchy (" cpuacct" );
2235
- static const Result<string> memHierarchy = cgroups::hierarchy (" memory" );
2236
2335
2237
- // NOTE: Normally, a Docker container should be in its own cgroup.
2238
- // However, a zombie process (exited but not reaped) will be
2239
- // temporarily moved into the system root cgroup. We add some
2240
- // defensive check here to make sure we are not reporting statistics
2241
- // for the root cgroup. See MESOS-8480 for details.
2242
- const string systemRootCgroup = stringify (os::PATH_SEPARATOR);
2336
+ static const Result<string> cpuacctHierarchy = cgroups::hierarchy (" cpuacct" );
2337
+ static const Result<string> memHierarchy = cgroups::hierarchy (" memory" );
2243
2338
2244
- if (cpuacctHierarchy.isError ()) {
2245
- return Error (
2246
- " Failed to determine the cgroup 'cpuacct' subsystem hierarchy: " +
2247
- cpuacctHierarchy.error ());
2248
- }
2339
+ // NOTE: Normally, a Docker container should be in its own cgroup.
2340
+ // However, a zombie process (exited but not reaped) will be
2341
+ // temporarily moved into the system root cgroup. We add some
2342
+ // defensive check here to make sure we are not reporting statistics
2343
+ // for the root cgroup. See MESOS-8480 for details.
2344
+ const string systemRootCgroup = stringify (os::PATH_SEPARATOR);
2249
2345
2250
- if (memHierarchy .isError ()) {
2251
- return Error (
2252
- " Failed to determine the cgroup 'memory ' subsystem hierarchy: " +
2253
- memHierarchy .error ());
2254
- }
2346
+ if (cpuacctHierarchy .isError ()) {
2347
+ return Error (
2348
+ " Failed to determine the cgroup 'cpuacct ' subsystem hierarchy: " +
2349
+ cpuacctHierarchy .error ());
2350
+ }
2255
2351
2256
- const Result<string> cpuacctCgroup = cgroups::cpuacct::cgroup (pid);
2257
- if (cpuacctCgroup.isError ()) {
2258
- return Error (
2259
- " Failed to determine cgroup for the 'cpuacct' subsystem: " +
2260
- cpuacctCgroup.error ());
2261
- } else if (cpuacctCgroup.isNone ()) {
2262
- return Error (" Unable to find 'cpuacct' cgroup subsystem" );
2263
- } else if (cpuacctCgroup.get () == systemRootCgroup) {
2352
+ if (memHierarchy.isError ()) {
2353
+ return Error (
2354
+ " Failed to determine the cgroup 'memory' subsystem hierarchy: " +
2355
+ memHierarchy.error ());
2356
+ }
2357
+
2358
+ const Result<string> cpuacctCgroup = cgroups::cpuacct::cgroup (pid);
2359
+ if (cpuacctCgroup.isError ()) {
2360
+ return Error (
2361
+ " Failed to determine cgroup for the 'cpuacct' subsystem: " +
2362
+ cpuacctCgroup.error ());
2363
+ } else if (cpuacctCgroup.isNone ()) {
2364
+ return Error (" Unable to find 'cpuacct' cgroup subsystem" );
2365
+ } else if (cpuacctCgroup.get () == systemRootCgroup) {
2366
+ return Error (
2367
+ " Process '" + stringify (pid) +
2368
+ " ' should not be in the system root cgroup (being destroyed?)" );
2369
+ }
2370
+
2371
+ const Result<string> memCgroup = cgroups::memory::cgroup (pid);
2372
+ if (memCgroup.isError ()) {
2373
+ return Error (
2374
+ " Failed to determine cgroup for the 'memory' subsystem: " +
2375
+ memCgroup.error ());
2376
+ } else if (memCgroup.isNone ()) {
2377
+ return Error (" Unable to find 'memory' cgroup subsystem" );
2378
+ } else if (memCgroup.get () == systemRootCgroup) {
2379
+ return Error (
2380
+ " Process '" + stringify (pid) +
2381
+ " ' should not be in the system root cgroup (being destroyed?)" );
2382
+ }
2383
+
2384
+ const Try<cgroups::cpuacct::Stats> cpuAcctStat =
2385
+ cgroups::cpuacct::stat (cpuacctHierarchy.get (), cpuacctCgroup.get ());
2386
+
2387
+ if (cpuAcctStat.isError ()) {
2388
+ return Error (" Failed to get cpu.stat: " + cpuAcctStat.error ());
2389
+ }
2390
+
2391
+ const Try<hashmap<string, uint64_t >> memStats =
2392
+ cgroups::stat (memHierarchy.get (), memCgroup.get (), " memory.stat" );
2393
+
2394
+ if (memStats.isError ()) {
2395
+ return Error (
2396
+ " Error getting memory statistics from cgroups memory subsystem: " +
2397
+ memStats.error ());
2398
+ }
2399
+
2400
+ if (!memStats->contains (" rss" )) {
2401
+ return Error (" cgroups memory stats does not contain 'rss' data" );
2402
+ }
2403
+
2404
+ ResourceStatistics result;
2405
+ result.set_timestamp (Clock::now ().secs ());
2406
+ result.set_cpus_system_time_secs (cpuAcctStat->system .secs ());
2407
+ result.set_cpus_user_time_secs (cpuAcctStat->user .secs ());
2408
+ result.set_mem_rss_bytes (memStats->at (" rss" ));
2409
+
2410
+ // Add the cpu.stat information only if CFS is enabled.
2411
+ if (flags.cgroups_enable_cfs ) {
2412
+ static const Result<string> cpuHierarchy = cgroups::hierarchy (" cpu" );
2413
+
2414
+ if (cpuHierarchy.isError ()) {
2264
2415
return Error (
2265
- " Process ' " + stringify (pid) +
2266
- " ' should not be in the system root cgroup (being destroyed?) " );
2416
+ " Failed to determine the cgroup 'cpu' subsystem hierarchy: " +
2417
+ cpuHierarchy. error () );
2267
2418
}
2268
2419
2269
- const Result<string> memCgroup = cgroups::memory ::cgroup (pid);
2270
- if (memCgroup .isError ()) {
2420
+ const Result<string> cpuCgroup = cgroups::cpu ::cgroup (pid);
2421
+ if (cpuCgroup .isError ()) {
2271
2422
return Error (
2272
- " Failed to determine cgroup for the 'memory ' subsystem: " +
2273
- memCgroup .error ());
2274
- } else if (memCgroup .isNone ()) {
2275
- return Error (" Unable to find 'memory ' cgroup subsystem" );
2276
- } else if (memCgroup .get () == systemRootCgroup) {
2423
+ " Failed to determine cgroup for the 'cpu ' subsystem: " +
2424
+ cpuCgroup .error ());
2425
+ } else if (cpuCgroup .isNone ()) {
2426
+ return Error (" Unable to find 'cpu ' cgroup subsystem" );
2427
+ } else if (cpuCgroup .get () == systemRootCgroup) {
2277
2428
return Error (
2278
2429
" Process '" + stringify (pid) +
2279
2430
" ' should not be in the system root cgroup (being destroyed?)" );
2280
2431
}
2281
2432
2282
- const Try<cgroups::cpuacct::Stats> cpuAcctStat =
2283
- cgroups::cpuacct:: stat (cpuacctHierarchy .get (), cpuacctCgroup .get ());
2433
+ const Try<hashmap<string, uint64_t >> stat =
2434
+ cgroups::stat (cpuHierarchy .get (), cpuCgroup .get (), " cpu.stat " );
2284
2435
2285
- if (cpuAcctStat .isError ()) {
2286
- return Error (" Failed to get cpu.stat: " + cpuAcctStat .error ());
2436
+ if (stat .isError ()) {
2437
+ return Error (" Failed to read cpu.stat: " + stat .error ());
2287
2438
}
2288
2439
2289
- const Try<hashmap<string, uint64_t >> memStats =
2290
- cgroups::stat (memHierarchy.get (), memCgroup.get (), " memory.stat" );
2291
-
2292
- if (memStats.isError ()) {
2293
- return Error (
2294
- " Error getting memory statistics from cgroups memory subsystem: " +
2295
- memStats.error ());
2440
+ Option<uint64_t > nr_periods = stat->get (" nr_periods" );
2441
+ if (nr_periods.isSome ()) {
2442
+ result.set_cpus_nr_periods (nr_periods.get ());
2296
2443
}
2297
2444
2298
- if (!memStats->contains (" rss" )) {
2299
- return Error (" cgroups memory stats does not contain 'rss' data" );
2445
+ Option<uint64_t > nr_throttled = stat->get (" nr_throttled" );
2446
+ if (nr_throttled.isSome ()) {
2447
+ result.set_cpus_nr_throttled (nr_throttled.get ());
2300
2448
}
2301
2449
2302
- result.set_timestamp (Clock::now ().secs ());
2303
- result.set_cpus_system_time_secs (cpuAcctStat->system .secs ());
2304
- result.set_cpus_user_time_secs (cpuAcctStat->user .secs ());
2305
- result.set_mem_rss_bytes (memStats->at (" rss" ));
2306
-
2307
- // Add the cpu.stat information only if CFS is enabled.
2308
- if (flags.cgroups_enable_cfs ) {
2309
- static const Result<string> cpuHierarchy = cgroups::hierarchy (" cpu" );
2310
-
2311
- if (cpuHierarchy.isError ()) {
2312
- return Error (
2313
- " Failed to determine the cgroup 'cpu' subsystem hierarchy: " +
2314
- cpuHierarchy.error ());
2315
- }
2316
-
2317
- const Result<string> cpuCgroup = cgroups::cpu::cgroup (pid);
2318
- if (cpuCgroup.isError ()) {
2319
- return Error (
2320
- " Failed to determine cgroup for the 'cpu' subsystem: " +
2321
- cpuCgroup.error ());
2322
- } else if (cpuCgroup.isNone ()) {
2323
- return Error (" Unable to find 'cpu' cgroup subsystem" );
2324
- } else if (cpuCgroup.get () == systemRootCgroup) {
2325
- return Error (
2326
- " Process '" + stringify (pid) +
2327
- " ' should not be in the system root cgroup (being destroyed?)" );
2328
- }
2329
-
2330
- const Try<hashmap<string, uint64_t >> stat =
2331
- cgroups::stat (cpuHierarchy.get (), cpuCgroup.get (), " cpu.stat" );
2332
-
2333
- if (stat.isError ()) {
2334
- return Error (" Failed to read cpu.stat: " + stat.error ());
2335
- }
2336
-
2337
- Option<uint64_t > nr_periods = stat->get (" nr_periods" );
2338
- if (nr_periods.isSome ()) {
2339
- result.set_cpus_nr_periods (nr_periods.get ());
2340
- }
2341
-
2342
- Option<uint64_t > nr_throttled = stat->get (" nr_throttled" );
2343
- if (nr_throttled.isSome ()) {
2344
- result.set_cpus_nr_throttled (nr_throttled.get ());
2345
- }
2346
-
2347
- Option<uint64_t > throttled_time = stat->get (" throttled_time" );
2348
- if (throttled_time.isSome ()) {
2349
- result.set_cpus_throttled_time_secs (
2350
- Nanoseconds (throttled_time.get ()).secs ());
2351
- }
2450
+ Option<uint64_t > throttled_time = stat->get (" throttled_time" );
2451
+ if (throttled_time.isSome ()) {
2452
+ result.set_cpus_throttled_time_secs (
2453
+ Nanoseconds (throttled_time.get ()).secs ());
2352
2454
}
2353
2455
}
2354
2456
0 commit comments