@@ -20,7 +20,6 @@ string_enum!(pub enum Status {
20
20
Queued => "queued" ,
21
21
Running => "running" ,
22
22
NeedsReport => "needs-report" ,
23
- Failed => "failed" ,
24
23
GeneratingReport => "generating-report" ,
25
24
ReportFailed => "report-failed" ,
26
25
Completed => "completed" ,
@@ -282,10 +281,9 @@ impl Experiment {
282
281
283
282
pub fn run_by ( db : & Database , assignee : & Assignee ) -> Fallible < Option < Experiment > > {
284
283
let record = db. get_row (
285
- "select * from experiments where name in ( \
286
- select experiment from experiment_crates \
287
- where status = ?2 and skipped = 0 and assigned_to = ?1 and \
288
- experiment in (select name from experiments where status = ?2)) \
284
+ "select * from experiments where name = (
285
+ select latest_work_for from agents where ('agent:' || agents.name) = ?1
286
+ ) and status = ?2 \
289
287
limit 1",
290
288
& [ & assignee. to_string ( ) , Status :: Running . to_str ( ) ] ,
291
289
|r| ExperimentDBRecord :: from_row ( r) ,
@@ -357,7 +355,7 @@ impl Experiment {
357
355
}
358
356
359
357
pub fn next ( db : & Database , assignee : & Assignee ) -> Fallible < Option < ( bool , Experiment ) > > {
360
- Self :: find_next ( db, assignee) . and_then ( |ex| Self :: assign_experiment ( db, ex) )
358
+ Self :: find_next ( db, assignee) . and_then ( |ex| Self :: assign_experiment ( db, ex, assignee ) )
361
359
}
362
360
pub fn has_next ( db : & Database , assignee : & Assignee ) -> Fallible < bool > {
363
361
Ok ( Self :: find_next ( db, assignee) ?. is_some ( ) )
@@ -366,8 +364,16 @@ impl Experiment {
366
364
fn assign_experiment (
367
365
db : & Database ,
368
366
ex : Option < Experiment > ,
367
+ agent : & Assignee ,
369
368
) -> Fallible < Option < ( bool , Experiment ) > > {
370
369
if let Some ( mut experiment) = ex {
370
+ if let Assignee :: Agent ( name) = agent {
371
+ db. execute (
372
+ "update agents set latest_work_for = ?2 where agents.name = ?1;" ,
373
+ rusqlite:: params![ & name, & experiment. name] ,
374
+ ) ?;
375
+ }
376
+
371
377
let new_ex = experiment. status != Status :: Running ;
372
378
if new_ex {
373
379
experiment. set_status ( db, Status :: Running ) ?;
@@ -405,19 +411,12 @@ impl Experiment {
405
411
const AGENT_QUERY : & str = r#"
406
412
SELECT *
407
413
FROM experiments ex
408
- WHERE ( ex.status = "queued"
409
- OR ( status = "running"
410
- AND ( SELECT COUNT (*)
411
- FROM experiment_crates ex_crates
412
- WHERE ex_crates.experiment = ex.name
413
- AND ( status = "queued")
414
- AND ( skipped = 0)
415
- > 0 ) ) )
416
- AND ( ex.assigned_to = ?1 )
417
- AND ( ex.requirement IS NULL
418
- OR ex.requirement IN ( SELECT capability
419
- FROM agent_capabilities
420
- WHERE agent_name = ?2) )
414
+ WHERE (ex.status = "queued" OR status = "running")
415
+ AND ( ex.assigned_to = ?1 )
416
+ AND ( ex.requirement IS NULL
417
+ OR ex.requirement IN (SELECT capability
418
+ FROM agent_capabilities
419
+ WHERE agent_name = ?2) )
421
420
ORDER BY ex.priority DESC,
422
421
ex.created_at
423
422
LIMIT 1;
@@ -434,15 +433,8 @@ impl Experiment {
434
433
const CLI_QUERY : & str = r#"
435
434
SELECT *
436
435
FROM experiments ex
437
- WHERE ( ex.status = "queued"
438
- OR ( status = "running"
439
- AND ( SELECT COUNT (*)
440
- FROM experiment_crates ex_crates
441
- WHERE ex_crates.experiment = ex.name
442
- AND ( status = "queued")
443
- AND ( skipped = 0)
444
- > 0 ) ) )
445
- AND ( ex.assigned_to IS NULL OR ex.assigned_to = ?1 )
436
+ WHERE (ex.status = "queued" OR status = "running")
437
+ AND (ex.assigned_to IS NULL OR ex.assigned_to = ?1)
446
438
ORDER BY ex.assigned_to IS NULL,
447
439
ex.priority DESC,
448
440
ex.created_at
@@ -456,14 +448,7 @@ impl Experiment {
456
448
const AGENT_UNASSIGNED_QUERY : & str = r#"
457
449
SELECT *
458
450
FROM experiments ex
459
- WHERE ( ex.status = "queued"
460
- OR ( status = "running"
461
- AND ( SELECT COUNT (*)
462
- FROM experiment_crates ex_crates
463
- WHERE ex_crates.experiment = ex.name
464
- AND ( status = "queued")
465
- AND ( skipped = 0)
466
- > 0 ) ) )
451
+ WHERE (ex.status = "queued" OR status = "running")
467
452
AND ( ex.assigned_to IS NULL )
468
453
AND ( ex.requirement IS NULL
469
454
OR ex.requirement IN ( SELECT capability
@@ -500,26 +485,6 @@ impl Experiment {
500
485
}
501
486
}
502
487
503
- pub fn clear_agent_progress ( & mut self , db : & Database , agent : & str ) -> Fallible < ( ) > {
504
- // Mark all the running crates from this agent as queued (so that they
505
- // run again)
506
- db. execute (
507
- "
508
- UPDATE experiment_crates
509
- SET assigned_to = NULL, status = ?1 \
510
- WHERE experiment = ?2 AND status = ?3 \
511
- AND assigned_to = ?4
512
- " ,
513
- & [
514
- & Status :: Queued . to_string ( ) ,
515
- & self . name ,
516
- & Status :: Running . to_string ( ) ,
517
- & Assignee :: Agent ( agent. to_string ( ) ) . to_string ( ) ,
518
- ] ,
519
- ) ?;
520
- Ok ( ( ) )
521
- }
522
-
523
488
pub fn set_status ( & mut self , db : & Database , status : Status ) -> Fallible < ( ) > {
524
489
db. execute (
525
490
"UPDATE experiments SET status = ?1 WHERE name = ?2;" ,
@@ -538,29 +503,13 @@ impl Experiment {
538
503
self . started_at = Some ( now) ;
539
504
}
540
505
// Check if the old status was "running" and there is no completed date
541
- ( Status :: Running , new_status)
542
- if self . completed_at . is_none ( ) && new_status != Status :: Failed =>
543
- {
506
+ ( Status :: Running , _) if self . completed_at . is_none ( ) => {
544
507
db. execute (
545
508
"UPDATE experiments SET completed_at = ?1 WHERE name = ?2;" ,
546
509
& [ & now, & self . name . as_str ( ) ] ,
547
510
) ?;
548
511
self . completed_at = Some ( now) ;
549
512
}
550
- // Queue again failed crates
551
- ( Status :: Failed , Status :: Queued ) => {
552
- db. execute (
553
- "UPDATE experiment_crates
554
- SET status = ?1 \
555
- WHERE experiment = ?2 AND status = ?3
556
- " ,
557
- & [
558
- & Status :: Queued . to_string ( ) ,
559
- & self . name ,
560
- & Status :: Failed . to_string ( ) ,
561
- ] ,
562
- ) ?;
563
- }
564
513
_ => ( ) ,
565
514
}
566
515
@@ -655,41 +604,46 @@ impl Experiment {
655
604
}
656
605
}
657
606
658
- pub fn get_uncompleted_crates (
659
- & self ,
660
- db : & Database ,
661
- config : & Config ,
662
- assigned_to : & Assignee ,
663
- ) -> Fallible < Vec < Crate > > {
607
+ pub fn get_uncompleted_crates ( & self , db : & Database , config : & Config ) -> Fallible < Vec < Crate > > {
664
608
let limit = self . crate_list_size ( config) ;
665
- let assigned_to = assigned_to. to_string ( ) ;
609
+
610
+ #[ cfg( not( test) ) ]
611
+ const RUN_TIMEOUT : u32 = 20 ;
612
+ #[ cfg( test) ]
613
+ const RUN_TIMEOUT : u32 = 1 ;
666
614
667
615
db. transaction ( |transaction| {
668
616
//get the first 'limit' queued crates from the experiment crates list
669
617
let mut params: Vec < & dyn rusqlite:: types:: ToSql > = Vec :: new ( ) ;
670
618
let crates = transaction
671
619
. query (
672
- "SELECT crate FROM experiment_crates WHERE experiment = ?1
673
- AND status = ?2 AND skipped = 0 LIMIT ?3;" ,
674
- rusqlite:: params![ self . name, Status :: Queued . to_string( ) , limit] ,
620
+ & format ! (
621
+ "SELECT crate FROM experiment_crates WHERE experiment = ?1
622
+ AND skipped = 0
623
+ AND status = 'queued'
624
+ AND (started_at is null or started_at <= datetime('now', '-{} minutes'))
625
+ LIMIT ?2;" ,
626
+ RUN_TIMEOUT
627
+ ) ,
628
+ rusqlite:: params![ self . name, limit] ,
675
629
|r| r. get ( "crate" ) ,
676
630
) ?
677
631
. into_iter ( )
678
632
. collect :: < Vec < String > > ( ) ;
679
633
680
634
crates. iter ( ) . for_each ( |krate| params. push ( krate) ) ;
681
- let params_header: & [ & dyn rusqlite:: types:: ToSql ] = & [ & assigned_to , & self . name ] ;
635
+ let params_header: & [ & dyn rusqlite:: types:: ToSql ] = & [ & self . name ] ;
682
636
//SQLite cannot handle queries with more than 999 variables
683
637
for params in params. chunks ( SQL_VARIABLE_LIMIT ) {
684
638
let params = [ params_header, params] . concat ( ) ;
685
639
let update_query = & [
686
640
"
687
641
UPDATE experiment_crates
688
- SET assigned_to = ?1, status = \" running \" \
689
- WHERE experiment = ?2
642
+ SET started_at = datetime('now')
643
+ WHERE experiment = ?1
690
644
AND crate IN ("
691
645
. to_string ( ) ,
692
- "?," . repeat ( params. len ( ) - 3 ) ,
646
+ "?," . repeat ( params. len ( ) - 2 ) ,
693
647
"?)" . to_string ( ) ,
694
648
]
695
649
. join ( "" ) ;
@@ -703,26 +657,6 @@ impl Experiment {
703
657
. collect :: < Fallible < Vec < Crate > > > ( )
704
658
} )
705
659
}
706
-
707
- pub fn get_running_crates (
708
- & self ,
709
- db : & Database ,
710
- assigned_to : & Assignee ,
711
- ) -> Fallible < Vec < Crate > > {
712
- db. query (
713
- "SELECT crate FROM experiment_crates WHERE experiment = ?1 \
714
- AND status = ?2 AND assigned_to = ?3",
715
- & [
716
- & self . name ,
717
- & Status :: Running . to_string ( ) ,
718
- & assigned_to. to_string ( ) ,
719
- ] ,
720
- |r| r. get ( 0 ) ,
721
- ) ?
722
- . into_iter ( )
723
- . map ( |c : String | c. parse ( ) )
724
- . collect :: < Fallible < Vec < Crate > > > ( )
725
- }
726
660
}
727
661
728
662
struct ExperimentDBRecord {
@@ -1065,16 +999,12 @@ mod tests {
1065
999
// Create a dummy experiment
1066
1000
CreateExperiment :: dummy ( "dummy" ) . apply ( & ctx) . unwrap ( ) ;
1067
1001
let ex = Experiment :: get ( & db, "dummy" ) . unwrap ( ) . unwrap ( ) ;
1068
- let crates = ex
1069
- . get_uncompleted_crates ( & db, & config, & Assignee :: CLI )
1070
- . unwrap ( ) ;
1002
+ let crates = ex. get_uncompleted_crates ( & db, & config) . unwrap ( ) ;
1071
1003
// Assert the whole list is returned
1072
1004
assert_eq ! ( crates. len( ) , ex. get_crates( & db) . unwrap( ) . len( ) ) ;
1073
1005
1074
1006
// Test already completed crates does not show up again
1075
- let uncompleted_crates = ex
1076
- . get_uncompleted_crates ( & db, & config, & Assignee :: CLI )
1077
- . unwrap ( ) ;
1007
+ let uncompleted_crates = ex. get_uncompleted_crates ( & db, & config) . unwrap ( ) ;
1078
1008
assert_eq ! ( uncompleted_crates. len( ) , 0 ) ;
1079
1009
}
1080
1010
@@ -1091,17 +1021,11 @@ mod tests {
1091
1021
1092
1022
// Create a dummy experiment
1093
1023
CreateExperiment :: dummy ( "dummy" ) . apply ( & ctx) . unwrap ( ) ;
1094
- let mut ex = Experiment :: next ( & db, & agent1) . unwrap ( ) . unwrap ( ) . 1 ;
1095
- assert ! ( !ex
1096
- . get_uncompleted_crates( & db, & config, & agent1)
1097
- . unwrap( )
1098
- . is_empty( ) ) ;
1099
- ex. clear_agent_progress ( & db, "agent-1" ) . unwrap ( ) ;
1024
+ let ex = Experiment :: next ( & db, & agent1) . unwrap ( ) . unwrap ( ) . 1 ;
1025
+ assert ! ( !ex. get_uncompleted_crates( & db, & config) . unwrap( ) . is_empty( ) ) ;
1100
1026
assert ! ( Experiment :: next( & db, & agent1) . unwrap( ) . is_some( ) ) ;
1027
+ std:: thread:: sleep ( std:: time:: Duration :: from_secs ( 80 ) ) ; // need to wait for at least 60 seconds for timeout to fire
1101
1028
assert_eq ! ( ex. status, Status :: Running ) ;
1102
- assert ! ( !ex
1103
- . get_uncompleted_crates( & db, & config, & agent1)
1104
- . unwrap( )
1105
- . is_empty( ) ) ;
1029
+ assert ! ( !ex. get_uncompleted_crates( & db, & config) . unwrap( ) . is_empty( ) ) ;
1106
1030
}
1107
1031
}
0 commit comments