@@ -427,11 +427,16 @@ let compute_corosync_max_host_failures ~__context =
427
427
corosync_ha_max_hosts
428
428
429
429
module Watcher = struct
430
+ let routine_updates = " routine updates"
431
+
430
432
let on_corosync_update ~__context ~cluster updates =
431
- debug
432
- " %s: Received %d updates from corosync_notifyd, run diagnostics to get \
433
- new state"
434
- __FUNCTION__ (List. length updates) ;
433
+ if updates = [routine_updates] then
434
+ debug " %s: Perform routine updates" __FUNCTION__
435
+ else
436
+ debug
437
+ " %s: Received %d updates from corosync_notifyd, run diagnostics to get \
438
+ new state"
439
+ __FUNCTION__ (List. length updates) ;
435
440
let m =
436
441
Cluster_client.LocalClient. diagnostics (rpc ~__context)
437
442
" update quorum api fields with diagnostics"
@@ -535,10 +540,10 @@ module Watcher = struct
535
540
536
541
let cluster_change_watcher : bool Atomic.t = Atomic. make false
537
542
538
- (* this is the time it takes for the update request to time out. It is ok to set
543
+ (* This is the time it takes for the update request to time out. It is ok to set
539
544
it to a relatively long value since the call will return immediately if there
540
- is an update *)
541
- let cluster_change_interval = Mtime.Span. min
545
+ is an update. *)
546
+ let cluster_change_interval = Mtime.Span. ( 5 * min)
542
547
543
548
let cluster_stack_watcher : bool Atomic.t = Atomic. make false
544
549
@@ -550,21 +555,27 @@ module Watcher = struct
550
555
while ! Daemon. enabled do
551
556
let m =
552
557
Cluster_client.LocalClient.UPDATES. get (rpc ~__context)
553
- " call cluster watcher"
558
+ " cluster change watcher call "
554
559
(Clock.Timer. span_to_s cluster_change_interval)
555
560
in
556
- match Idl.IdM. run @@ Cluster_client.IDL.T. get m with
557
- | Ok updates -> (
561
+ let find_cluster_and_update updates =
558
562
match find_cluster_host ~__context ~host with
559
563
| Some ch ->
560
564
let cluster = Db.Cluster_host. get_cluster ~__context ~self: ch in
561
565
on_corosync_update ~__context ~cluster updates
562
566
| None ->
563
567
()
564
- )
568
+ in
569
+ match Idl.IdM. run @@ Cluster_client.IDL.T. get m with
570
+ | Ok updates ->
571
+ (* Received updates from corosync-notifyd *)
572
+ find_cluster_and_update updates
565
573
| Error (InternalError "UPDATES.Timeout" ) ->
566
- (* UPDATES.get timed out, this is normal, now retry *)
567
- ()
574
+ (* UPDATES.get timed out, this is normal. *)
575
+ (* CA-395789: We send a query to xapi-clusterd to fetch the latest state
576
+ anyway in case there is a race and the previous update did not give the
577
+ most up-to-date information *)
578
+ find_cluster_and_update [routine_updates]
568
579
| Error (InternalError message ) | Error (Unix_error message ) ->
569
580
warn " %s: Cannot query cluster host updates with error %s"
570
581
__FUNCTION__ message
0 commit comments