@@ -29,7 +29,6 @@ use hyperactor::channel::ChannelTx;
29
29
use hyperactor:: channel:: Rx ;
30
30
use hyperactor:: channel:: Tx ;
31
31
use hyperactor:: channel:: TxStatus ;
32
- use hyperactor:: id;
33
32
use hyperactor:: sync:: flag;
34
33
use hyperactor:: sync:: monitor;
35
34
use hyperactor_state:: state_actor:: StateActor ;
@@ -51,6 +50,8 @@ use crate::bootstrap;
51
50
use crate :: bootstrap:: Allocator2Process ;
52
51
use crate :: bootstrap:: Process2Allocator ;
53
52
use crate :: bootstrap:: Process2AllocatorMessage ;
53
+ use crate :: log_source:: LogSource ;
54
+ use crate :: log_source:: StateServerInfo ;
54
55
use crate :: shortuuid:: ShortUuid ;
55
56
56
57
/// The maximum number of log lines to tail keep for managed processes.
@@ -87,6 +88,9 @@ impl Allocator for ProcessAllocator {
87
88
let ( bootstrap_addr, rx) = channel:: serve ( ChannelAddr :: any ( ChannelTransport :: Unix ) )
88
89
. await
89
90
. map_err ( anyhow:: Error :: from) ?;
91
+ let log_source = LogSource :: new_with_local_actor ( )
92
+ . await
93
+ . map_err ( AllocatorError :: from) ?;
90
94
91
95
let name = ShortUuid :: generate ( ) ;
92
96
let n = spec. shape . slice ( ) . len ( ) ;
@@ -95,6 +99,7 @@ impl Allocator for ProcessAllocator {
95
99
world_id : WorldId ( name. to_string ( ) ) ,
96
100
spec : spec. clone ( ) ,
97
101
bootstrap_addr,
102
+ log_source,
98
103
rx,
99
104
index : 0 ,
100
105
active : HashMap :: new ( ) ,
@@ -113,6 +118,7 @@ pub struct ProcessAlloc {
113
118
world_id : WorldId , // to provide storage
114
119
spec : AllocSpec ,
115
120
bootstrap_addr : ChannelAddr ,
121
+ log_source : LogSource ,
116
122
rx : channel:: ChannelRx < Process2Allocator > ,
117
123
index : usize ,
118
124
active : HashMap < usize , Child > ,
@@ -143,13 +149,14 @@ struct Child {
143
149
impl Child {
144
150
fn monitored (
145
151
mut process : tokio:: process:: Child ,
152
+ state_server_info : StateServerInfo ,
146
153
) -> ( Self , impl Future < Output = ProcStopReason > ) {
147
154
let ( group, handle) = monitor:: group ( ) ;
148
155
let ( exit_flag, exit_guard) = flag:: guarded ( ) ;
149
156
let stop_reason = Arc :: new ( OnceLock :: new ( ) ) ;
150
157
151
158
// TODO(lky): enable state actor branch and remove this flag
152
- let use_state_actor = false ;
159
+ let use_state_actor = true ;
153
160
154
161
// Set up stdout and stderr writers
155
162
let mut stdout_tee: Box < dyn io:: AsyncWrite + Send + Unpin + ' static > =
@@ -159,24 +166,20 @@ impl Child {
159
166
160
167
// If state actor is enabled, try to set up LogWriter instances
161
168
if use_state_actor {
162
- let state_actor_ref = ActorRef :: < StateActor > :: attest ( id ! ( state_server[ 0 ] . state[ 0 ] ) ) ;
163
- // Parse the state actor address
164
- if let Ok ( state_actor_addr) = "tcp![::]:3000" . parse :: < ChannelAddr > ( ) {
165
- // Use the helper function to create both writers at once
166
- match hyperactor_state:: log_writer:: create_log_writers (
167
- state_actor_addr,
168
- state_actor_ref,
169
- ) {
170
- Ok ( ( stdout_writer, stderr_writer) ) => {
171
- stdout_tee = stdout_writer;
172
- stderr_tee = stderr_writer;
173
- }
174
- Err ( e) => {
175
- tracing:: error!( "failed to create log writers: {}" , e) ;
176
- }
169
+ let state_actor_ref = ActorRef :: < StateActor > :: attest ( state_server_info. state_actor_id ) ;
170
+ let state_actor_addr = state_server_info. state_proc_addr ;
171
+ // Use the helper function to create both writers at once
172
+ match hyperactor_state:: log_writer:: create_log_writers (
173
+ state_actor_addr,
174
+ state_actor_ref,
175
+ ) {
176
+ Ok ( ( stdout_writer, stderr_writer) ) => {
177
+ stdout_tee = stdout_writer;
178
+ stderr_tee = stderr_writer;
179
+ }
180
+ Err ( e) => {
181
+ tracing:: error!( "failed to create log writers: {}" , e) ;
177
182
}
178
- } else {
179
- tracing:: error!( "failed to parse state actor address" ) ;
180
183
}
181
184
}
182
185
@@ -389,7 +392,8 @@ impl ProcessAlloc {
389
392
None
390
393
}
391
394
Ok ( rank) => {
392
- let ( handle, monitor) = Child :: monitored ( process) ;
395
+ let ( handle, monitor) =
396
+ Child :: monitored ( process, self . log_source . server_info ( ) ) ;
393
397
self . children . spawn ( async move { ( index, monitor. await ) } ) ;
394
398
self . active . insert ( index, handle) ;
395
399
// Adjust for shape slice offset for non-zero shapes (sub-shapes).
@@ -493,6 +497,10 @@ impl Alloc for ProcessAlloc {
493
497
ChannelTransport :: Unix
494
498
}
495
499
500
+ async fn log_source ( & self ) -> Result < LogSource , AllocatorError > {
501
+ Ok ( self . log_source . clone ( ) )
502
+ }
503
+
496
504
async fn stop ( & mut self ) -> Result < ( ) , AllocatorError > {
497
505
// We rely on the teardown here, and that the process should
498
506
// exit on its own. We should have a hard timeout here as well,
0 commit comments