From 77e3a3e820c19ee5722d0c09f9972f9f762c296f Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:08:14 +0100 Subject: [PATCH 1/3] CA-400060: Introduce new field for sm class `host_pending_features` represents the features that are available on some of the hosts in the pool, but not all of them. Note the way this field is initialised in the `SM.create` code means that it will only contain new features that appear during upgrade. This means a feature that is added into `SM.features` during creation will stay there even if it is not available on all hosts. But we should not end up in this situation in the first place. Also change the meaning of `Sm.features` to be pool-wide. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel.ml | 10 ++++++++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/schematest.ml | 2 +- ocaml/sdk-gen/csharp/gen_csharp_binding.ml | 4 ++++ ocaml/tests/common/test_common.ml | 9 +++++---- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index e21369be258..83d5d1740c3 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -5001,11 +5001,21 @@ module SM = struct , "capabilities of the SM plugin, with capability version \ numbers" ) + ; ( Changed + , "24.37.0" + , "features are now pool-wide, instead of what is available on \ + the coordinator sm" + ) ] ~ty:(Map (String, Int)) "features" "capabilities of the SM plugin, with capability version numbers" ~default_value:(Some (VMap [])) + ; field ~in_oss_since:None ~qualifier:DynamicRO ~lifecycle:[] + ~ty:(Map (Ref _host, Set String)) + ~internal_only:true "host_pending_features" + "SM features that are waiting to be declared per host." + ~default_value:(Some (VMap [])) ; field ~lifecycle:[(Published, rel_miami, "additional configuration")] ~default_value:(Some (VMap [])) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index a5fb8bd381a..80c5076fef7 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 784 +let schema_minor_vsn = 785 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 60e46afb038..fb728685a55 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -51,6 +51,8 @@ let prototyped_of_field = function Some "22.26.0" | "VTPM", "persistence_backend" -> Some "22.26.0" + | "SM", "host_pending_features" -> + Some "24.36.0-next" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 595289dfd24..2c4a87453ba 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "b427bac09aca4eabc9407738a9155326" +let last_known_schema_hash = "18df8c33434e3df1982e11ec55d1f3f8" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml index bbf3360c897..c9112b680e3 100644 --- a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml +++ b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml @@ -1184,6 +1184,10 @@ and json_serialization_attr fr = (exposed_class_name v) | Map (String, String) -> sprintf "\n [JsonConverter(typeof(StringStringMapConverter))]" + | Map (Ref u, Set String) -> + sprintf + "\n [JsonConverer(typeof(XenRefStringSetMapConverter<%s>))]" + (exposed_class_name u) | Map (Ref _, _) | Map (_, Ref _) -> failwith (sprintf "Need converter for %s" fr.field_name) | _ -> diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7908eb4e3ff..297a68398ca 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -342,12 +342,13 @@ let default_sm_features = let make_sm ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(_type = "sm") ?(name_label = "") ?(name_description = "") ?(vendor = "") ?(copyright = "") ?(version = "") ?(required_api_version = "") - ?(capabilities = []) ?(features = default_sm_features) ?(configuration = []) - ?(other_config = []) ?(driver_filename = "/dev/null") - ?(required_cluster_stack = []) () = + ?(capabilities = []) ?(features = default_sm_features) + ?(host_pending_features = []) ?(configuration = []) ?(other_config = []) + ?(driver_filename = "/dev/null") ?(required_cluster_stack = []) () = Db.SM.create ~__context ~ref ~uuid ~_type ~name_label ~name_description ~vendor ~copyright ~version ~required_api_version ~capabilities ~features - ~configuration ~other_config ~driver_filename ~required_cluster_stack ; + ~host_pending_features ~configuration ~other_config ~driver_filename + ~required_cluster_stack ; ref let make_sr ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) From 7f14bfcc2ee6ed583330263ae47dc87a7ba665ea Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:10:44 +0100 Subject: [PATCH 2/3] CA-400060: Sm feature intersection NEW Sm features that are found during an upgrde will now only be available when they are available on all of the hosts. Add logic to keep track of features that are only availabe on some of the hosts in the pool, and declare them in `Sm.feature` only when all of the hosts have declared this. Also move `Storage_access.on_xapi_start` to `dbsync_slave` as this needs to be run on all hosts for each sm to get a chance to say what features they have. Signed-off-by: Vincent Liu --- ocaml/tests/test_sm_features.ml | 42 +++++++++++++++++++ ocaml/xapi/dbsync_master.ml | 1 - ocaml/xapi/dbsync_slave.ml | 3 ++ ocaml/xapi/smint.ml | 17 ++++++++ ocaml/xapi/xapi_globs.ml | 2 + ocaml/xapi/xapi_sm.ml | 71 +++++++++++++++++++++++++++++++-- 6 files changed, 131 insertions(+), 5 deletions(-) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index a78de4a54a7..091d58d4f6e 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -160,6 +160,21 @@ let test_sequences = } ] +let test_intersection_sequences = + ( { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 1L)] + ; smapiv2_features= ["VDI_MIRROR/1"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + , { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 2L)] + ; smapiv2_features= ["VDI_MIRROR/2"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + ) + module ParseSMAPIv1Features = Generic.MakeStateless (struct module Io = struct type input_t = string list @@ -249,6 +264,32 @@ module CreateSMObject = Generic.MakeStateful (struct ) end) +module CompatSMFeatures = Generic.MakeStateless (struct + module Io = struct + type input_t = (string * string) list + + type output_t = string list + + let string_of_input_t = Test_printers.(list (fun (x, y) -> x ^ "," ^ y)) + + let string_of_output_t = Test_printers.(list Fun.id) + end + + let transform l = + List.split l |> fun (x, y) -> + (Smint.parse_string_int64_features x, Smint.parse_string_int64_features y) + |> fun (x, y) -> Smint.compat_features x y |> List.map Smint.unparse_feature + + let tests = + let r1, r2 = test_intersection_sequences in + `QuickAndAutoDocumented + [ + ( List.combine r1.smapiv2_features r2.smapiv2_features + , r1.smapiv2_features + ) + ] +end) + let tests = List.map (fun (s, t) -> (Format.sprintf "sm_features_%s" s, t)) @@ -256,4 +297,5 @@ let tests = ("parse_smapiv1_features", ParseSMAPIv1Features.tests) ; ("create_smapiv2_features", CreateSMAPIv2Features.tests) ; ("create_sm_object", CreateSMObject.tests) + ; ("compat_sm_features", CompatSMFeatures.tests) ] diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index aad7434dc02..cac05f37e88 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -373,7 +373,6 @@ let update_env __context = in the db for cancelling *) Cancel_tasks.cancel_tasks_on_host ~__context ~host_opt:None ; (* Update the SM plugin table *) - Storage_access.on_xapi_start ~__context ; if !Xapi_globs.create_tools_sr then create_tools_sr_noexn __context ; ensure_vm_metrics_records_exist_noexn __context ; diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 3b90a3a05c3..942d3081071 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -362,6 +362,9 @@ let update_env __context sync_keys = switched_sync Xapi_globs.sync_refresh_localhost_info (fun () -> refresh_localhost_info ~__context info ) ; + switched_sync Xapi_globs.sync_sm_records (fun () -> + Storage_access.on_xapi_start ~__context + ) ; switched_sync Xapi_globs.sync_local_vdi_activations (fun () -> Storage_access.refresh_local_vdi_activations ~__context ) ; diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index 25019a18294..8797e0d7cf6 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -110,6 +110,8 @@ let capability_of_feature : feature -> capability = fst let known_features = List.map fst string_to_capability_table +let unparse_feature (f, v) = f ^ "/" ^ Int64.to_string v + let parse_string_int64_features features = let scan feature = match String.split_on_char '/' feature with @@ -134,6 +136,21 @@ let parse_string_int64_features features = |> List.filter_map scan |> List.sort_uniq (fun (x, _) (y, _) -> compare x y) +(** [compat_features features1 features2] finds the compatible features in the input +features lists. We assume features backwards compatible, i.e. if there are FOO/1 and + FOO/2 are present, then we assume they can both do FOO/1*) +let compat_features features1 features2 = + let features2 = List.to_seq features2 |> Hashtbl.of_seq in + List.filter_map + (fun (f1, v1) -> + match Hashtbl.find_opt features2 f1 with + | Some v2 -> + Some (f1, Int64.min v1 v2) + | None -> + None + ) + features1 + let parse_capability_int64_features strings = List.map (function c, v -> (List.assoc c string_to_capability_table, v)) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 9a461a4e7bb..efdcabfbdb6 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -340,6 +340,8 @@ let sync_switch_off = "nosync" (* dbsync_slave *) let sync_local_vdi_activations = "sync_local_vdi_activations" +let sync_sm_records = "sync_sm_records" + let sync_create_localhost = "sync_create_localhost" let sync_set_cache_sr = "sync_set_cache_sr" diff --git a/ocaml/xapi/xapi_sm.ml b/ocaml/xapi/xapi_sm.ml index ba3d7c8242a..9badc179c06 100644 --- a/ocaml/xapi/xapi_sm.ml +++ b/ocaml/xapi/xapi_sm.ml @@ -18,6 +18,8 @@ (* The SMAPIv1 plugins are a static set in the filesystem. The SMAPIv2 plugins are a dynamic set hosted in driver domains. *) +module Listext = Xapi_stdext_std.Listext + let finally = Xapi_stdext_pervasives.Pervasiveext.finally (* We treat versions as '.'-separated integer lists under the usual @@ -36,7 +38,7 @@ let create_from_query_result ~__context q = if String.lowercase_ascii q.driver <> "storage_access" then ( let features = Smint.parse_string_int64_features q.features in let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q.driver) q.version ; Db.SM.create ~__context ~ref:r ~uuid:u @@ -44,19 +46,80 @@ let create_from_query_result ~__context q = ~name_label:q.name ~name_description:q.description ~vendor:q.vendor ~copyright:q.copyright ~version:q.version ~required_api_version:q.required_api_version ~capabilities ~features - ~configuration:q.configuration ~other_config:[] + ~host_pending_features:[] ~configuration:q.configuration ~other_config:[] ~driver_filename:(Sm_exec.cmd_name q.driver) ~required_cluster_stack:q.required_cluster_stack ) +let find_pending_features existing_features features = + Listext.List.set_difference features existing_features + +(** [addto_pending_hosts_features ~__context self new_features] will add [new_features] +to pending features of host [self]. It then returns a list of currently pending features *) +let addto_pending_hosts_features ~__context self new_features = + let host = Helpers.get_localhost ~__context in + let new_features = + List.map (fun (f, v) -> Smint.unparse_feature (f, v)) new_features + in + let curr_pending_features = + Db.SM.get_host_pending_features ~__context ~self + |> List.remove_assoc host + |> List.cons (host, new_features) + in + Db.SM.set_host_pending_features ~__context ~self ~value:curr_pending_features ; + List.iter + (fun (h, f) -> + debug "%s: current pending features for host %s, sm %s, features %s" + __FUNCTION__ (Ref.string_of h) (Ref.string_of self) (String.concat "," f) + ) + curr_pending_features ; + List.map + (fun (h, f) -> (h, Smint.parse_string_int64_features f)) + curr_pending_features + +let valid_hosts_pending_features ~__context pending_features = + if List.length pending_features <> List.length (Db.Host.get_all ~__context) + then ( + debug "%s: Not enough hosts have registered their sm features" __FUNCTION__ ; + [] + ) else + List.map snd pending_features |> fun l -> + List.fold_left Smint.compat_features + (* The list in theory cannot be empty due to the if condition check, but do + this just in case *) + (List.nth_opt l 0 |> Option.fold ~none:[] ~some:Fun.id) + (List.tl l) + +let remove_valid_features_from_pending ~__context ~self valid_features = + let valid_features = List.map Smint.unparse_feature valid_features in + let new_pending_feature = + Db.SM.get_host_pending_features ~__context ~self + |> List.map (fun (h, pending_features) -> + (h, Listext.List.set_difference pending_features valid_features) + ) + in + Db.SM.set_host_pending_features ~__context ~self ~value:new_pending_feature + let update_from_query_result ~__context (self, r) q_result = let open Storage_interface in let _type = String.lowercase_ascii q_result.driver in if _type <> "storage_access" then ( let driver_filename = Sm_exec.cmd_name q_result.driver in - let features = Smint.parse_string_int64_features q_result.features in + let existing_features = Db.SM.get_features ~__context ~self in + let new_features = + Smint.parse_string_int64_features q_result.features + |> find_pending_features existing_features + |> addto_pending_hosts_features ~__context self + |> valid_hosts_pending_features ~__context + in + remove_valid_features_from_pending ~__context ~self new_features ; + let features = existing_features @ new_features in + List.iter + (fun (f, v) -> debug "%s: declaring new features %s:%Ld" __FUNCTION__ f v) + new_features ; + let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q_result.driver) q_result.version ; if r.API.sM_type <> _type then From 2ec0ac68689ad24c38cf789bd094f4db058bda19 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:14:43 +0100 Subject: [PATCH 3/3] CA-400060: Reject pool join if sm features mismatch Implement a new `assert_sm_features_compatiable` in pre_join_checks so that if the joining host does not have compatible sm features, it will be denied entry into the pool. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_errors.ml | 7 +++++ ocaml/xapi-consts/api_errors.ml | 3 ++ ocaml/xapi/xapi_pool.ml | 49 ++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index aead3e0abc4..80b36218f25 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -890,6 +890,13 @@ let _ = "The host joining the pool has different CA certificates from the pool \ coordinator while using the same name, uninstall them and try again." () ; + error Api_errors.pool_joining_sm_features_incompatible + ["pool_sm_ref"; "candidate_sm_ref"] + ~doc: + "The host joining the pool has an incompatible set of sm features from \ + the pool coordinator. Make sure the sm are of the same versions and try \ + again." + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index ebafbdaa111..53e9e06176b 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -754,6 +754,9 @@ let pool_joining_host_tls_verification_mismatch = let pool_joining_host_ca_certificates_conflict = add_error "POOL_JOINING_HOST_CA_CERTIFICATES_CONFLICT" +let pool_joining_sm_features_incompatible = + add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index acb22cdcfcd..eb716ce766e 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -839,6 +839,52 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let assert_sm_features_compatible () = + (* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n, + where n >= m, to be compatible. Not vice versa. *) + let features_compatible coor_features candidate_features = + (* The pool features must not be reduced or downgraded, although it is fine + the other way around. *) + Smint.compat_features coor_features candidate_features = coor_features + in + + let master_sms = Client.SM.get_all ~rpc ~session_id in + List.iter + (fun sm -> + let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in + let candidate_sm_ref, candidate_sm_rec = + match + Db.SM.get_records_where ~__context + ~expr:(Eq (Field "type", Literal master_sm_type)) + with + | [(sm_ref, sm_rec)] -> + (sm_ref, sm_rec) + | _ -> + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; ""] + ) + ) + in + + let coor_sm_features = + Client.SM.get_features ~rpc ~session_id ~self:sm + in + let candidate_sm_features = candidate_sm_rec.API.sM_features in + if not (features_compatible coor_sm_features candidate_sm_features) then + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; Ref.string_of candidate_sm_ref] + ) + ) + ) + master_sms + in + (* call pre-join asserts *) assert_pool_size_unrestricted () ; assert_management_interface_exists () ; @@ -872,7 +918,8 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_tls_verification_matches () ; assert_ca_certificates_compatible () ; assert_not_in_updating_on_me () ; - assert_no_hosts_in_updating () + assert_no_hosts_in_updating () ; + assert_sm_features_compatible () let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : API.ref_host =