Skip to content

Commit 985ed47

Browse files
pzhan9facebook-github-bot
authored andcommitted
Add more logs for server (#492)
Summary: Pull Request resolved: #492 helpful to debug Reviewed By: shayne-fletcher Differential Revision: D78116522 fbshipit-source-id: 9889b1c4d01cf3bae95b40f9cb1a4b5e75521b40
1 parent 1eae6f6 commit 985ed47

File tree

7 files changed

+18
-15
lines changed

7 files changed

+18
-15
lines changed

hyperactor/src/channel/net.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,8 @@ impl<M: RemoteMessage> Rx<M> for NetRx<M> {
751751

752752
impl<M: RemoteMessage> Drop for NetRx<M> {
753753
fn drop(&mut self) {
754-
self.2.stop();
754+
self.2
755+
.stop(&format!("NetRx dropped; channel address: {}", self.1));
755756
}
756757
}
757758

@@ -780,12 +781,13 @@ impl ServerHandle {
780781
/// incoming connection requests, and drain pending operations
781782
/// on active connections. After draining is completed, the
782783
/// connections are closed.
783-
pub fn stop(&self) {
784+
fn stop(&self, reason: &str) {
785+
tracing::info!("stopping server: {}; reason: {}", self, reason);
784786
self.cancel_token.cancel();
785787
}
786788

787789
/// Reference to the channel that was used to start the server.
788-
pub fn local_channel_addr(&self) -> &ChannelAddr {
790+
fn local_channel_addr(&self) -> &ChannelAddr {
789791
&self.channel_addr
790792
}
791793
}
@@ -1145,7 +1147,7 @@ where
11451147
let _ = tracing::info_span!("channel_listen_accept_loop");
11461148
tokio::select! {
11471149
result = listener.accept() => {
1148-
tracing::debug!("listener accepted a new connection.");
1150+
tracing::debug!("listener accepted a new connection to {}", listener_channel_addr);
11491151
match result {
11501152
Ok((stream, addr)) => {
11511153
let tx = tx.clone();
@@ -1180,13 +1182,13 @@ where
11801182
});
11811183
}
11821184
Err(err) => {
1183-
tracing::info!("serve {}: accept error: {}", listener_channel_addr,err)
1185+
tracing::info!("serve {}: accept error: {}", listener_channel_addr, err)
11841186
}
11851187
}
11861188
}
11871189

11881190
_ = parent_cancel_token.cancelled() => {
1189-
tracing::info!("received parent token cancellation");
1191+
tracing::info!("serve {}: received parent token cancellation", listener_channel_addr);
11901192
break Ok(());
11911193
}
11921194

@@ -1962,7 +1964,7 @@ mod tests {
19621964
RealClock.sleep(Duration::from_secs(5)).await;
19631965

19641966
// Stop the server.
1965-
rx1.2.stop();
1967+
rx1.2.stop("from testing");
19661968
assert_matches!(rx1.recv().await.unwrap_err(), ChannelError::Closed);
19671969

19681970
// Send the message is allowed even when the server is down.

hyperactor/src/mailbox.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,8 @@ impl MailboxServerHandle {
809809
/// join the handle by awaiting the [`MailboxServerHandle`] future.
810810
///
811811
/// Stop should be called at most once.
812-
pub fn stop(&self) {
812+
pub fn stop(&self, reason: &str) {
813+
tracing::info!("stopping mailbox server; reason: {}", reason);
813814
self.stopped_tx.send(true).expect("stop called twice");
814815
}
815816
}
@@ -2554,7 +2555,7 @@ mod tests {
25542555
.serialize_and_send_once(port, 123u64, monitored_return_handle())
25552556
.unwrap();
25562557
assert_eq!(receiver.recv().await.unwrap(), 123u64);
2557-
serve_handle.stop();
2558+
serve_handle.stop("fromt test");
25582559
serve_handle.await.unwrap().unwrap();
25592560
}
25602561

@@ -2595,7 +2596,7 @@ mod tests {
25952596
.serialize_and_send_once(port, msg, monitored_return_handle())
25962597
.unwrap();
25972598
assert_eq!(receiver.recv().await.unwrap(), msg);
2598-
serve_handle.stop();
2599+
serve_handle.stop("from test");
25992600
serve_handle.await.unwrap().unwrap();
26002601
}
26012602

hyperactor_mesh/src/alloc/local.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ impl Alloc for LocalAlloc {
220220
};
221221

222222
// Stop serving the mailbox.
223-
proc_to_stop.handle.stop();
223+
proc_to_stop.handle.stop("received Action::Stop");
224224

225225
if let Err(err) = proc_to_stop
226226
.proc

hyperactor_mesh/src/alloc/remoteprocess.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ impl RemoteProcessAllocator {
371371
)
372372
.await;
373373

374-
mailbox_handle.stop();
374+
mailbox_handle.stop("alloc stopped");
375375
if let Err(e) = mailbox_handle.await {
376376
tracing::error!("failed to join forwarder: {}", e);
377377
}

hyperactor_multiprocess/src/proc_actor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ impl ProcActor {
488488
/// Shutdown the mailbox server to free up rx and its cooresponding listen address.
489489
/// Because in the next bootstrap attempt, the same listen address will be used.
490490
async fn failed_proc_bootstrap_cleanup(mailbox_handle: MailboxServerHandle) {
491-
mailbox_handle.stop();
491+
mailbox_handle.stop("failed proc bootstrap cleanup");
492492
if let Err(shutdown_err) = mailbox_handle.await {
493493
// Ignore the shutdown error and populate the original error.
494494
tracing::error!(

hyperactor_multiprocess/src/system.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ impl ServerHandle {
145145
pub async fn stop(&self) -> Result<(), ActorError> {
146146
// TODO: this needn't be async
147147
self.actor_handle.drain_and_stop()?;
148-
self.mailbox_handle.stop();
148+
self.mailbox_handle.stop("system server stopped");
149149
Ok(())
150150
}
151151

hyperactor_multiprocess/src/system_actor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2722,7 +2722,7 @@ mod tests {
27222722

27232723
// Now kill pong's mailbox server making message delivery
27242724
// between procs impossible.
2725-
proc_actor_1.mailbox.stop();
2725+
proc_actor_1.mailbox.stop("from testing");
27262726
proc_actor_1.mailbox.await.unwrap().unwrap();
27272727

27282728
// That in itself shouldn't be a problem. Check the world

0 commit comments

Comments
 (0)