@@ -14,7 +14,8 @@ use crate::module_host_context::ModuleCreationContext;
14
14
use crate :: replica_context:: ReplicaContext ;
15
15
use crate :: subscription:: module_subscription_actor:: ModuleSubscriptions ;
16
16
use crate :: subscription:: module_subscription_manager:: SubscriptionManager ;
17
- use crate :: util:: { asyncify, spawn_rayon} ;
17
+ use crate :: util:: asyncify;
18
+ use crate :: util:: jobs:: { JobCore , JobCores } ;
18
19
use crate :: worker_metrics:: WORKER_METRICS ;
19
20
use anyhow:: { anyhow, ensure, Context } ;
20
21
use async_trait:: async_trait;
@@ -95,6 +96,8 @@ pub struct HostController {
95
96
pub page_pool : PagePool ,
96
97
/// The runtimes for running our modules.
97
98
runtimes : Arc < HostRuntimes > ,
99
+ /// The CPU cores that are reserved for ModuleHost operations to run on.
100
+ db_cores : JobCores ,
98
101
}
99
102
100
103
struct HostRuntimes {
@@ -169,6 +172,7 @@ impl HostController {
169
172
program_storage : ProgramStorage ,
170
173
energy_monitor : Arc < impl EnergyMonitor > ,
171
174
durability : Arc < dyn DurabilityProvider > ,
175
+ db_cores : JobCores ,
172
176
) -> Self {
173
177
Self {
174
178
hosts : <_ >:: default ( ) ,
@@ -179,6 +183,7 @@ impl HostController {
179
183
runtimes : HostRuntimes :: new ( Some ( & data_dir) ) ,
180
184
data_dir,
181
185
page_pool : PagePool :: new ( default_config. page_pool_max_size ) ,
186
+ db_cores,
182
187
}
183
188
}
184
189
@@ -267,7 +272,19 @@ impl HostController {
267
272
/// This is not necessary during hotswap publishes,
268
273
/// as the automigration planner and executor accomplish the same validity checks.
269
274
pub async fn check_module_validity ( & self , database : Database , program : Program ) -> anyhow:: Result < Arc < ModuleInfo > > {
270
- Host :: try_init_in_memory_to_check ( & self . runtimes , self . page_pool . clone ( ) , database, program) . await
275
+ Host :: try_init_in_memory_to_check (
276
+ & self . runtimes ,
277
+ self . page_pool . clone ( ) ,
278
+ database,
279
+ program,
280
+ // This takes a db core to check validity, and we will later take
281
+ // another core to actually run the module. Due to the round-robin
282
+ // algorithm that JobCores uses, that will likely just be the same
283
+ // core - there's not a concern that we'll only end up using 1/2
284
+ // of the actual cores.
285
+ self . db_cores . take ( ) ,
286
+ )
287
+ . await
271
288
}
272
289
273
290
/// Run a computation on the [`RelationalDB`] of a [`ModuleHost`] managed by
@@ -338,6 +355,7 @@ impl HostController {
338
355
program,
339
356
self . energy_monitor . clone ( ) ,
340
357
self . unregister_fn ( replica_id) ,
358
+ self . db_cores . take ( ) ,
341
359
)
342
360
. await ?;
343
361
@@ -415,6 +433,7 @@ impl HostController {
415
433
program,
416
434
self . energy_monitor . clone ( ) ,
417
435
self . unregister_fn ( replica_id) ,
436
+ self . db_cores . take ( ) ,
418
437
)
419
438
. await ?;
420
439
match update_result {
@@ -556,6 +575,7 @@ async fn make_replica_ctx(
556
575
557
576
/// Initialize a module host for the given program.
558
577
/// The passed replica_ctx may not be configured for this version of the program's database schema yet.
578
+ #[ allow( clippy:: too_many_arguments) ]
559
579
async fn make_module_host (
560
580
runtimes : Arc < HostRuntimes > ,
561
581
host_type : HostType ,
@@ -564,8 +584,14 @@ async fn make_module_host(
564
584
program : Program ,
565
585
energy_monitor : Arc < dyn EnergyMonitor > ,
566
586
unregister : impl Fn ( ) + Send + Sync + ' static ,
587
+ core : JobCore ,
567
588
) -> anyhow:: Result < ( Program , ModuleHost ) > {
568
- spawn_rayon ( move || {
589
+ // `make_actor` is blocking, as it needs to compile the wasm to native code,
590
+ // which may be computationally expensive - sometimes up to 1s for a large module.
591
+ // TODO: change back to using `spawn_rayon` here - asyncify runs on tokio blocking
592
+ // threads, but those aren't for computation. Also, wasmtime uses rayon
593
+ // to run compilation in parallel, so it'll need to run stuff in rayon anyway.
594
+ asyncify ( move || {
569
595
let module_host = match host_type {
570
596
HostType :: Wasm => {
571
597
let mcc = ModuleCreationContext {
@@ -577,7 +603,7 @@ async fn make_module_host(
577
603
let start = Instant :: now ( ) ;
578
604
let actor = runtimes. wasmtime . make_actor ( mcc) ?;
579
605
trace ! ( "wasmtime::make_actor blocked for {:?}" , start. elapsed( ) ) ;
580
- ModuleHost :: new ( actor, unregister)
606
+ ModuleHost :: new ( actor, unregister, core )
581
607
}
582
608
} ;
583
609
Ok ( ( program, module_host) )
@@ -610,6 +636,7 @@ async fn launch_module(
610
636
energy_monitor : Arc < dyn EnergyMonitor > ,
611
637
replica_dir : ReplicaDir ,
612
638
runtimes : Arc < HostRuntimes > ,
639
+ core : JobCore ,
613
640
) -> anyhow:: Result < ( Program , LaunchedModule ) > {
614
641
let db_identity = database. database_identity ;
615
642
let host_type = database. host_type ;
@@ -626,6 +653,7 @@ async fn launch_module(
626
653
program,
627
654
energy_monitor. clone ( ) ,
628
655
on_panic,
656
+ core,
629
657
)
630
658
. await ?;
631
659
@@ -776,6 +804,7 @@ impl Host {
776
804
energy_monitor. clone ( ) ,
777
805
replica_dir,
778
806
runtimes. clone ( ) ,
807
+ host_controller. db_cores . take ( ) ,
779
808
)
780
809
. await ?;
781
810
@@ -834,6 +863,7 @@ impl Host {
834
863
page_pool : PagePool ,
835
864
database : Database ,
836
865
program : Program ,
866
+ core : JobCore ,
837
867
) -> anyhow:: Result < Arc < ModuleInfo > > {
838
868
// Even in-memory databases acquire a lockfile.
839
869
// Grab a tempdir to put that lockfile in.
@@ -865,6 +895,7 @@ impl Host {
865
895
Arc :: new ( NullEnergyMonitor ) ,
866
896
phony_replica_dir,
867
897
runtimes. clone ( ) ,
898
+ core,
868
899
)
869
900
. await ?;
870
901
@@ -895,6 +926,7 @@ impl Host {
895
926
program : Program ,
896
927
energy_monitor : Arc < dyn EnergyMonitor > ,
897
928
on_panic : impl Fn ( ) + Send + Sync + ' static ,
929
+ core : JobCore ,
898
930
) -> anyhow:: Result < UpdateDatabaseResult > {
899
931
let replica_ctx = & self . replica_ctx ;
900
932
let ( scheduler, scheduler_starter) = Scheduler :: open ( self . replica_ctx . relational_db . clone ( ) ) ;
@@ -907,6 +939,7 @@ impl Host {
907
939
program,
908
940
energy_monitor,
909
941
on_panic,
942
+ core,
910
943
)
911
944
. await ?;
912
945
@@ -981,10 +1014,15 @@ pub async fn extract_schema(program_bytes: Box<[u8]>, host_type: HostType) -> an
981
1014
982
1015
let runtimes = HostRuntimes :: new ( None ) ;
983
1016
let page_pool = PagePool :: new ( None ) ;
984
- let module_info = Host :: try_init_in_memory_to_check ( & runtimes, page_pool, database, program) . await ?;
985
- let module_info = Arc :: into_inner ( module_info) . unwrap ( ) ;
1017
+ let core = JobCore :: default ( ) ;
1018
+ let module_info = Host :: try_init_in_memory_to_check ( & runtimes, page_pool, database, program, core) . await ?;
1019
+ // this should always succeed, but sometimes it doesn't
1020
+ let module_def = match Arc :: try_unwrap ( module_info) {
1021
+ Ok ( info) => info. module_def ,
1022
+ Err ( info) => info. module_def . clone ( ) ,
1023
+ } ;
986
1024
987
- Ok ( module_info . module_def )
1025
+ Ok ( module_def)
988
1026
}
989
1027
990
1028
// Remove all gauges associated with a database.
0 commit comments