Skip to content

Commit 7d9f2e2

Browse files
authored
Merge pull request #1116 from rust-lang/more-tracking
Increase tracking of running Docker containers
2 parents 9c9985f + 337628a commit 7d9f2e2

File tree

2 files changed

+47
-4
lines changed

2 files changed

+47
-4
lines changed

compiler/base/orchestrator/src/coordinator.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ use futures::{
66
use serde::Deserialize;
77
use snafu::prelude::*;
88
use std::{
9-
collections::HashMap,
9+
collections::{BTreeSet, HashMap},
1010
fmt, mem, ops,
1111
process::Stdio,
1212
sync::{
1313
atomic::{AtomicU64, Ordering},
14-
Arc,
14+
Arc, LazyLock, Mutex,
1515
},
1616
time::Duration,
1717
};
@@ -2532,11 +2532,23 @@ pub enum CommanderError {
25322532
WorkerOperationFailed { source: SerializedError2 },
25332533
}
25342534

2535+
pub static TRACKED_CONTAINERS: LazyLock<Mutex<BTreeSet<Arc<str>>>> =
2536+
LazyLock::new(Default::default);
2537+
25352538
#[derive(Debug)]
25362539
pub struct TerminateContainer(Option<(String, Command)>);
25372540

25382541
impl TerminateContainer {
25392542
pub fn new(name: String, command: Command) -> Self {
2543+
let was_inserted = TRACKED_CONTAINERS
2544+
.lock()
2545+
.unwrap_or_else(|e| e.into_inner())
2546+
.insert(name.clone().into());
2547+
2548+
if !was_inserted {
2549+
error!(%name, "This container was already tracked; duplicates are bad logic");
2550+
}
2551+
25402552
Self(Some((name, command)))
25412553
}
25422554

@@ -2548,6 +2560,7 @@ impl TerminateContainer {
25482560
use terminate_container_error::*;
25492561

25502562
if let Some((name, mut kill_child)) = self.0.take() {
2563+
Self::stop_tracking(&name);
25512564
let o = kill_child
25522565
.output()
25532566
.await
@@ -2558,6 +2571,16 @@ impl TerminateContainer {
25582571
Ok(())
25592572
}
25602573

2574+
fn stop_tracking(name: &str) {
2575+
let was_tracked = TRACKED_CONTAINERS
2576+
.lock()
2577+
.unwrap_or_else(|e| e.into_inner())
2578+
.remove(name);
2579+
if !was_tracked {
2580+
error!(%name, "Container was not in the tracking set");
2581+
}
2582+
}
2583+
25612584
fn report_failure(name: String, s: std::process::Output) {
25622585
// We generally don't care if the command itself succeeds or
25632586
// not; the container may already be dead! However, let's log
@@ -2570,6 +2593,9 @@ impl TerminateContainer {
25702593
let stdout = String::from_utf8_lossy(&s.stdout);
25712594
let stderr = String::from_utf8_lossy(&s.stderr);
25722595

2596+
let stdout = stdout.trim();
2597+
let stderr = stderr.trim();
2598+
25732599
error!(?code, %stdout, %stderr, %name, "Killing the container failed");
25742600
}
25752601
}
@@ -2578,6 +2604,7 @@ impl TerminateContainer {
25782604
impl Drop for TerminateContainer {
25792605
fn drop(&mut self) {
25802606
if let Some((name, mut kill_child)) = self.0.take() {
2607+
Self::stop_tracking(&name);
25812608
match kill_child.as_std_mut().output() {
25822609
Ok(o) => Self::report_failure(name, o),
25832610
Err(e) => error!("Unable to kill container {name} while dropping: {e}"),

ui/src/server_axum.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ use axum_extra::{
2626
TypedHeader,
2727
};
2828
use futures::{future::BoxFuture, FutureExt};
29-
use orchestrator::coordinator::{self, CoordinatorFactory, DockerBackend, Versions};
29+
use orchestrator::coordinator::{
30+
self, CoordinatorFactory, DockerBackend, Versions, TRACKED_CONTAINERS,
31+
};
3032
use snafu::prelude::*;
3133
use std::{
3234
convert::TryInto,
@@ -100,7 +102,11 @@ pub(crate) async fn serve(config: Config) {
100102
.route("/metrics", get(metrics))
101103
.route("/websocket", get(websocket))
102104
.route("/nowebsocket", post(nowebsocket))
103-
.route("/whynowebsocket", get(whynowebsocket))
105+
.route("/internal/debug/whynowebsocket", get(whynowebsocket))
106+
.route(
107+
"/internal/debug/tracked-containers",
108+
get(tracked_containers),
109+
)
104110
.layer(Extension(factory))
105111
.layer(Extension(db_handle))
106112
.layer(Extension(Arc::new(SandboxCache::default())))
@@ -680,6 +686,16 @@ async fn whynowebsocket() -> String {
680686
format!("{:#?}", WS_ERRORS.lock().unwrap_or_else(|e| e.into_inner()))
681687
}
682688

689+
async fn tracked_containers() -> String {
690+
let tracked_containers = TRACKED_CONTAINERS
691+
.lock()
692+
.unwrap_or_else(|e| e.into_inner())
693+
.clone();
694+
tracked_containers
695+
.iter()
696+
.fold(String::new(), |a, s| a + s + "\n")
697+
}
698+
683699
#[derive(Debug)]
684700
struct MetricsAuthorization;
685701

0 commit comments

Comments
 (0)