Skip to content

Commit 9f654a1

Browse files
authored
Merge pull request #5646 from Vincent-lau/private/shul2/corosync3-msg
CP-49634: Add alerting for Corosync upgrade
2 parents 2aa27d7 + 6577325 commit 9f654a1

File tree

2 files changed

+61
-3
lines changed

2 files changed

+61
-3
lines changed

ocaml/xapi-consts/api_messages.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ let cluster_host_leaving = addMessage "CLUSTER_HOST_LEAVING" 3L
311311

312312
let cluster_host_joining = addMessage "CLUSTER_HOST_JOINING" 4L
313313

314+
let cluster_stack_out_of_date = addMessage "CLUSTER_STACK_OUT_OF_DATE" 3L
315+
314316
(* Certificate expiration messages *)
315317
let host_server_certificate_expiring = "HOST_SERVER_CERTIFICATE_EXPIRING"
316318

ocaml/xapi/xapi_clustering.ml

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,8 @@ module Watcher = struct
540540
is an update *)
541541
let cluster_change_interval = Mtime.Span.min
542542

543+
let cluster_stack_watcher : bool Atomic.t = Atomic.make false
544+
543545
(* we handle unclean hosts join and leave in the watcher, i.e. hosts joining and leaving
544546
due to network problems, power cut, etc. Join and leave initiated by the
545547
API will be handled in the API call themselves, but they share the same code
@@ -573,22 +575,76 @@ module Watcher = struct
573575
done ;
574576
Atomic.set cluster_change_watcher false
575577

578+
let watch_cluster_stack_version ~__context ~host =
579+
if !Daemon.enabled then
580+
match find_cluster_host ~__context ~host with
581+
| Some ch ->
582+
let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self:ch in
583+
let cluster_rec =
584+
Db.Cluster.get_record ~__context ~self:cluster_ref
585+
in
586+
if
587+
Cluster_stack.of_version
588+
( cluster_rec.API.cluster_cluster_stack
589+
, cluster_rec.API.cluster_cluster_stack_version
590+
)
591+
= Cluster_stack.Corosync2
592+
then (
593+
debug "%s: Detected Corosync 2 running as cluster stack"
594+
__FUNCTION__ ;
595+
let body =
596+
"The current cluster stack version of Corosync 2 is out of date, \
597+
consider updating to Corosync 3"
598+
in
599+
let name, priority = Api_messages.cluster_stack_out_of_date in
600+
let host_uuid = Db.Host.get_uuid ~__context ~self:host in
601+
602+
Helpers.call_api_functions ~__context (fun rpc session_id ->
603+
let _ : [> `message] Ref.t =
604+
Client.Client.Message.create ~rpc ~session_id ~name ~priority
605+
~cls:`Host ~obj_uuid:host_uuid ~body
606+
in
607+
()
608+
)
609+
)
610+
| None ->
611+
debug "%s: No cluster host, no need to watch" __FUNCTION__
612+
576613
(** [create_as_necessary] will create cluster watchers on the coordinator if they are not
577614
already created.
578615
There is no need to destroy them: once the clustering daemon is disabled,
579616
these threads will exit as well. *)
580617
let create_as_necessary ~__context ~host =
581-
if Helpers.is_pool_master ~__context ~host then
618+
if Helpers.is_pool_master ~__context ~host then (
582619
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
583620
if Atomic.compare_and_set cluster_change_watcher false true then (
584621
debug "%s: create watcher for corosync-notifyd on coordinator"
585622
__FUNCTION__ ;
586-
ignore
587-
@@ Thread.create (fun () -> watch_cluster_change ~__context ~host) ()
623+
let _ : Thread.t =
624+
Thread.create (fun () -> watch_cluster_change ~__context ~host) ()
625+
in
626+
()
588627
) else
589628
(* someone else must have gone into the if branch above and created the thread
590629
before us, leave it to them *)
591630
debug
592631
"%s: not create watcher for corosync-notifyd as it already exists"
632+
__FUNCTION__ ;
633+
634+
if Xapi_cluster_helpers.corosync3_enabled ~__context then
635+
if Atomic.compare_and_set cluster_stack_watcher false true then (
636+
debug
637+
"%s: create cluster stack watcher for out-of-date cluster stack \
638+
(corosync2)"
639+
__FUNCTION__ ;
640+
let _ : Thread.t =
641+
Thread.create
642+
(fun () -> watch_cluster_stack_version ~__context ~host)
643+
()
644+
in
645+
()
646+
) else
647+
debug "%s: not create watcher for cluster stack as it already exists"
593648
__FUNCTION__
649+
)
594650
end

0 commit comments

Comments
 (0)