From 750518bfc845d364019f886140c7c9e30e41d5b1 Mon Sep 17 00:00:00 2001 From: Gang Ji <62988402+gangj@users.noreply.github.com> Date: Wed, 23 Oct 2024 10:18:36 +0800 Subject: [PATCH 001/117] CP-51393: Datamodel: update Repository for syncing from a remote pool (#6049) - add a new type of origin: "remote_pool" - add a new API: "introduce_remote_pool" to init a remote_pool repository - add a new field: "certificate" for a remote_pool repository - for a remote_pool repository, binary_url will be reused to hold the base URL of binary packages in the local repository of the remote pool in https:///repository format Signed-off-by: Gang Ji --- ocaml/idl/datamodel_repository.ml | 31 +++++++++++++++++++++++-- ocaml/idl/schematest.ml | 2 +- ocaml/xapi-cli-server/cli_frontend.ml | 9 +++++++ ocaml/xapi-cli-server/cli_operations.ml | 15 ++++++++++++ ocaml/xapi-cli-server/records.ml | 3 +++ ocaml/xapi/message_forwarding.ml | 9 +++++++ ocaml/xapi/repository.ml | 25 +++++++++++++++++++- ocaml/xapi/repository.mli | 8 +++++++ ocaml/xapi/repository_helpers.ml | 6 +++-- ocaml/xapi/xapi_globs.ml | 2 ++ ocaml/xapi/xapi_pool.ml | 2 +- 11 files changed, 105 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_repository.ml b/ocaml/idl/datamodel_repository.ml index 2142084c984..e2a8f03b0c5 100644 --- a/ocaml/idl/datamodel_repository.ml +++ b/ocaml/idl/datamodel_repository.ml @@ -24,6 +24,7 @@ let origin = , [ ("remote", "The origin of the repository is a remote one") ; ("bundle", "The origin of the repository is a local bundle file") + ; ("remote_pool", "The origin of the repository is a remote pool") ] ) @@ -94,6 +95,27 @@ let introduce_bundle = ~allowed_roles:(_R_POOL_OP ++ _R_CLIENT_CERT) () +let introduce_remote_pool = + call ~name:"introduce_remote_pool" ~in_oss_since:None ~lifecycle:[] + ~doc:"Add the configuration for a new remote pool repository" + ~params: + [ + (String, "name_label", "The name of the repository") + ; (String, "name_description", "The description of the repository") + ; ( String + , "binary_url" + , "Base URL of binary packages in the local repository of this remote \ + pool in https:///repository format" + ) + ; ( String + , "certificate" + , "The host certificate of the coordinator of the remote pool" + ) + ] + ~result:(Ref _repository, "The ref of the created repository record.") + ~allowed_roles:(_R_POOL_OP ++ _R_CLIENT_CERT) + () + let forget = call ~name:"forget" ~in_oss_since:None ~lifecycle:[(Published, "1.301.0", "")] @@ -173,6 +195,7 @@ let t = [ introduce ; introduce_bundle + ; introduce_remote_pool ; forget ; apply ; set_gpgkey_path @@ -223,8 +246,12 @@ let t = "The file name of the GPG public key of this repository" ; field ~qualifier:StaticRO ~lifecycle:[] ~ty:origin "origin" ~default_value:(Some (VEnum "remote")) - "The origin of the repository. 'remote' if the origin of the \ + "The origin of this repository. 'remote' if the origin of the \ repository is a remote one, 'bundle' if the origin of the \ - repository is a local bundle file." + repository is a local bundle file, 'remote_pool' if the origin of \ + the repository is a remote pool" + ; field ~qualifier:StaticRO ~lifecycle:[] ~ty:String + ~default_value:(Some (VString "")) "certificate" + "The certificate of the host which hosts this repository" ] () diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 016a90960f3..d83bf34775a 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "8fcd8892ec0c7d130b0da44c5fd3990b" +let last_known_schema_hash = "aba698bd66b04e0145f07130e6db9cad" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 881d016267a..70a3374f0e9 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -3684,6 +3684,15 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "repository-introduce-remote-pool" + , { + reqd= ["name-label"; "binary-url"; "certificate-file"] + ; optn= ["name-description"] + ; help= "Add the configuration for a new remote pool repository." + ; implementation= With_fd Cli_operations.Repository.introduce_remote_pool + ; flags= [] + } + ) ; ( "repository-forget" , { reqd= ["uuid"] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index aa3bf08c05a..4b39545da4b 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -7936,6 +7936,21 @@ module Repository = struct let uuid = Client.Repository.get_uuid ~rpc ~session_id ~self:ref in printer (Cli_printer.PList [uuid]) + let introduce_remote_pool fd printer rpc session_id params = + let name_label = List.assoc "name-label" params in + let name_description = get_param params "name-description" ~default:"" in + let binary_url = List.assoc "binary-url" params in + let certificate = + List.assoc "certificate-file" params + |> get_file_or_fail fd "certificate file" + in + let ref = + Client.Repository.introduce_remote_pool ~rpc ~session_id ~name_label + ~name_description ~binary_url ~certificate + in + let uuid = Client.Repository.get_uuid ~rpc ~session_id ~self:ref in + printer (Cli_printer.PList [uuid]) + let forget _printer rpc session_id params = let ref = Client.Repository.get_by_uuid ~rpc ~session_id diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index cd7e2f5ae80..f6996089b55 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -5287,6 +5287,9 @@ let repository_record rpc session_id repository = Record_util.origin_to_string (x ()).API.repository_origin ) () + ; make_field ~name:"certificate" ~hidden:true + ~get:(fun () -> (x ()).API.repository_certificate) + () ] } diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index c85dc2cb025..e4b58c71bdd 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -6608,6 +6608,15 @@ functor Local.Repository.introduce_bundle ~__context ~name_label ~name_description + let introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url ~certificate = + info + "Repository.introduce_remote_pool: name = '%s'; name_description = \ + '%s'; binary_url = '%s'; certificate = '%s'" + name_label name_description binary_url certificate ; + Local.Repository.introduce_remote_pool ~__context ~name_label + ~name_description ~binary_url ~certificate + let forget ~__context ~self = info "Repository.forget: self = '%s'" (repository_uuid ~__context self) ; Local.Repository.forget ~__context ~self diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index dd123557a49..b48601da4fd 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -46,7 +46,7 @@ let introduce ~__context ~name_label ~name_description ~binary_url ~source_url ) ) ; create_repository_record ~__context ~name_label ~name_description ~binary_url - ~source_url ~update ~gpgkey_path ~origin:`remote + ~source_url ~update ~gpgkey_path ~origin:`remote ~certificate:"" let introduce_bundle ~__context ~name_label ~name_description = Db.Repository.get_all ~__context @@ -65,6 +65,24 @@ let introduce_bundle ~__context ~name_label ~name_description = ) ; create_repository_record ~__context ~name_label ~name_description ~binary_url:"" ~source_url:"" ~update:true ~gpgkey_path:"" ~origin:`bundle + ~certificate:"" + +let introduce_remote_pool ~__context ~name_label ~name_description ~binary_url + ~certificate = + Db.Repository.get_all ~__context + |> List.iter (fun ref -> + if + name_label = Db.Repository.get_name_label ~__context ~self:ref + || binary_url = Db.Repository.get_binary_url ~__context ~self:ref + then + raise + Api_errors.( + Server_error (repository_already_exists, [Ref.string_of ref]) + ) + ) ; + create_repository_record ~__context ~name_label ~name_description ~binary_url + ~source_url:"" ~update:true ~gpgkey_path:"" ~origin:`remote_pool + ~certificate let forget ~__context ~self = let pool = Helpers.get_pool ~__context in @@ -143,6 +161,11 @@ let sync ~__context ~self ~token ~token_id = Uri.make ~scheme:"file" ~path:!Xapi_globs.bundle_repository_dir () in (Uri.to_string uri, None) + | `remote_pool -> + (* TODO: sync with Stunnel.with_client_proxy as otherwise yum + reposync will fail when checking the self signed certificate on + the remote pool. *) + ("", None) in let gpgkey_path = match Db.Repository.get_gpgkey_path ~__context ~self with diff --git a/ocaml/xapi/repository.mli b/ocaml/xapi/repository.mli index e7bddad8bad..a454adfc187 100644 --- a/ocaml/xapi/repository.mli +++ b/ocaml/xapi/repository.mli @@ -28,6 +28,14 @@ val introduce_bundle : -> name_description:string -> [`Repository] API.Ref.t +val introduce_remote_pool : + __context:Context.t + -> name_label:string + -> name_description:string + -> binary_url:string + -> certificate:string + -> [`Repository] API.Ref.t + val forget : __context:Context.t -> self:[`Repository] API.Ref.t -> unit val cleanup_all_pool_repositories : unit -> unit diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 51699612739..4d3cc5766f1 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -136,12 +136,12 @@ module GuidanceSet = struct end let create_repository_record ~__context ~name_label ~name_description - ~binary_url ~source_url ~update ~gpgkey_path ~origin = + ~binary_url ~source_url ~update ~gpgkey_path ~origin ~certificate = let ref = Ref.make () in let uuid = Uuidx.(to_string (make ())) in Db.Repository.create ~__context ~ref ~uuid ~name_label ~name_description ~binary_url ~source_url ~update ~hash:"" ~up_to_date:false ~gpgkey_path - ~origin ; + ~origin ~certificate ; ref module DomainNameIncludeIP = struct @@ -384,6 +384,8 @@ let get_remote_repository_name ~__context ~self = !Xapi_globs.remote_repository_prefix | `bundle -> !Xapi_globs.bundle_repository_prefix + | `remote_pool -> + !Xapi_globs.remote_pool_repository_prefix in prefix ^ "-" ^ get_repository_name ~__context ~self diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index c675e036451..d2c3ccf3693 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -933,6 +933,8 @@ let remote_repository_prefix = ref "remote" let bundle_repository_prefix = ref "bundle" +let remote_pool_repository_prefix = ref "remote-pool" + let local_repository_prefix = ref "local" let yum_config_manager_cmd = ref "/usr/bin/yum-config-manager" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 13b1d698714..cb9b6b843ce 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3898,7 +3898,7 @@ let put_bundle_handler (req : Request.t) s _ = ) ; Http_svr.headers s (Http.http_400_badrequest ()) ) - | `remote -> + | `remote | `remote_pool -> error "%s: Bundle repo is not enabled" __FUNCTION__ ; TaskHelper.failed ~__context Api_errors.(Server_error (bundle_repo_not_enabled, [])) ; From 654e7c1a44e1a406fd6f266c348aa31070470859 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Wed, 23 Oct 2024 08:25:16 +0100 Subject: [PATCH 002/117] CP-51835: Keep the HTTP /repository handler enabled The HTTP /repository handler is guarded by a mutex `exposing_pool_repo_mutex` currently. Since now HTTP /repository is protected by `session_id` cookie, we can remove the mutex from this handler and keep the handler enabled all the time. Also, rename the mutex `exposing_pool_repo_mutex` to `pool_update_ops_mutex`. Signed-off-by: Bengang Yuan --- ocaml/xapi/repository.ml | 10 +--------- ocaml/xapi/repository_helpers.ml | 13 +++---------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index dd123557a49..58331f73ba0 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -488,10 +488,6 @@ let get_host_updates_in_json ~__context ~installed = (ExnHelper.string_of_exn e) ; raise Api_errors.(Server_error (get_host_updates_failed, [ref])) -(* This handler hosts HTTP endpoint '/repository' which will be available iif - * 'is_local_pool_repo_enabled' returns true with 'with_pool_repositories' being called by - * others. - *) let get_repository_handler (req : Http.Request.t) s _ = let open Http in let open Xapi_stdext_std.Xstringext in @@ -499,7 +495,7 @@ let get_repository_handler (req : Http.Request.t) s _ = req.Request.close <- true ; if Fileserver.access_forbidden req s then Http_svr.response_forbidden ~req s - else if is_local_pool_repo_enabled () then + else let can_be_authorized = try Xapi_http.with_context "get_repository_handler" req s (fun _ -> ()) ; @@ -536,10 +532,6 @@ let get_repository_handler (req : Http.Request.t) s _ = (ExnHelper.string_of_exn e) ; Http_svr.response_forbidden ~req s ) - else ( - error "Rejecting request: local pool repository is not enabled" ; - Http_svr.response_forbidden ~req s - ) let consolidate_updates_of_hosts ~repository_name ~updates_info ~hosts = Hashtbl.fold diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 51699612739..c27f9d02a41 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -22,7 +22,7 @@ open Updateinfo module LivePatchSet = Set.Make (LivePatch) module RpmFullNameSet = Set.Make (String) -let exposing_pool_repo_mutex = Mutex.create () +let pool_update_ops_mutex = Mutex.create () module Pkgs = (val Pkg_mgr.get_pkg_mgr) @@ -234,17 +234,10 @@ let assert_gpgkey_path_is_valid path = let with_pool_repositories f = Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - Mutex.lock exposing_pool_repo_mutex ; + Mutex.lock pool_update_ops_mutex ; f () ) - (fun () -> Mutex.unlock exposing_pool_repo_mutex) - -let is_local_pool_repo_enabled () = - if Mutex.try_lock exposing_pool_repo_mutex then ( - Mutex.unlock exposing_pool_repo_mutex ; - false - ) else - true + (fun () -> Mutex.unlock pool_update_ops_mutex) let with_updateinfo_xml gz_path f = let tmpfile, tmpch = From 9d677cdc47021db3ae8235721e7bca226927d4bc Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 24 Oct 2024 16:25:49 +0800 Subject: [PATCH 003/117] CP-50789: Enable verified rpc to external host Now xapi supports setting up rpc to hosts in the pool and appliances, while for syncing updates from remote_pool type repository, we need to set up rpc to remote coordinator with its certificate verfieid. Add util Helpers.make_external_host_verified_rpc, which will set up a secure connection to the external host(host outside the pool) with its host certificate verified. Signed-off-by: Gang Ji --- ocaml/libs/stunnel/stunnel.ml | 3 +++ ocaml/libs/stunnel/stunnel.mli | 2 ++ ocaml/libs/stunnel/stunnel_client.ml | 11 ++++++++--- ocaml/libs/stunnel/stunnel_client.mli | 2 ++ ocaml/xapi/helpers.ml | 10 ++++++++++ 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 8d319b4b80d..3d1c10972db 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -157,6 +157,9 @@ let pool = ; cert_bundle_path= "/etc/stunnel/xapi-pool-ca-bundle.pem" } +let external_host ext_host_cert_file = + {sni= None; verify= VerifyPeer; cert_bundle_path= ext_host_cert_file} + let debug_conf_of_bool verbose : string = if verbose then "debug=authpriv.7" else "debug=authpriv.5" diff --git a/ocaml/libs/stunnel/stunnel.mli b/ocaml/libs/stunnel/stunnel.mli index eba084a9ef2..de607816cb0 100644 --- a/ocaml/libs/stunnel/stunnel.mli +++ b/ocaml/libs/stunnel/stunnel.mli @@ -59,6 +59,8 @@ val appliance : verification_config val pool : verification_config +val external_host : string -> verification_config + val with_connect : ?unique_id:int -> ?use_fork_exec_helper:bool diff --git a/ocaml/libs/stunnel/stunnel_client.ml b/ocaml/libs/stunnel/stunnel_client.ml index 4f755d11a8f..b6329a0e041 100644 --- a/ocaml/libs/stunnel/stunnel_client.ml +++ b/ocaml/libs/stunnel/stunnel_client.ml @@ -26,7 +26,12 @@ let set_verify_by_default = function D.info "enabling default tls verification" ; verify := true -let pool () = match !verify with true -> Some Stunnel.pool | false -> None +let get_verification_config config = + match !verify with true -> Some config | false -> None -let appliance () = - match !verify with true -> Some Stunnel.appliance | false -> None +let pool () = get_verification_config Stunnel.pool + +let appliance () = get_verification_config Stunnel.appliance + +let external_host cert_file = + Stunnel.external_host cert_file |> get_verification_config diff --git a/ocaml/libs/stunnel/stunnel_client.mli b/ocaml/libs/stunnel/stunnel_client.mli index 2897d104494..b3dd0392bf4 100644 --- a/ocaml/libs/stunnel/stunnel_client.mli +++ b/ocaml/libs/stunnel/stunnel_client.mli @@ -19,3 +19,5 @@ val set_verify_by_default : bool -> unit val pool : unit -> Stunnel.verification_config option val appliance : unit -> Stunnel.verification_config option + +val external_host : string -> Stunnel.verification_config option diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 30965068f3f..a484f95b700 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -2015,6 +2015,16 @@ let with_temp_out_ch_of_temp_file ?mode prefix suffix f = let@ path, channel = with_temp_file ?mode prefix suffix in f (path, channel |> with_temp_out_ch) +let make_external_host_verified_rpc ~__context ext_host_address ext_host_cert + xml = + let@ temp_file, temp_out_ch = with_temp_file "external-host-cert" ".pem" in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> output_string temp_out_ch ext_host_cert) + (fun () -> close_out temp_out_ch) ; + make_remote_rpc ~__context + ~verify_cert:(Stunnel_client.external_host temp_file) + ext_host_address xml + module FileSys : sig (* bash-like interface for manipulating files *) type path = string From 30ce0dbe569d0fa8faabcdac793636f270065c06 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 29 Oct 2024 01:27:34 +0000 Subject: [PATCH 004/117] CP-51836: Restrict/check binary_url of remote_pool repository Add an assertion to restrict `binary_url` of remote_pool repository to be in the format of `https:///repository/enabled`. Signed-off-by: Bengang Yuan --- ocaml/xapi-consts/constants.ml | 3 +++ ocaml/xapi/repository.ml | 1 + ocaml/xapi/repository_helpers.ml | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 2c7fc49e179..5bc68a61892 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -155,6 +155,9 @@ let get_pool_update_download_uri = "/update/" let get_repository_uri = "/repository" (* ocaml/xapi/repository.ml *) +let get_enabled_repository_uri = + "/repository/enabled" (* ocaml/xapi/repository.ml *) + let get_host_updates_uri = "/host_updates" (* ocaml/xapi/repository.ml *) let get_updates_uri = "/updates" (* ocaml/xapi/repository.ml *) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index eef3ac40dc0..fc3d0f93d33 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -69,6 +69,7 @@ let introduce_bundle ~__context ~name_label ~name_description = let introduce_remote_pool ~__context ~name_label ~name_description ~binary_url ~certificate = + assert_remote_pool_url_is_valid ~url:binary_url ; Db.Repository.get_all ~__context |> List.iter (fun ref -> if diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index b032cc335c5..4016a158237 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -231,6 +231,18 @@ let assert_gpgkey_path_is_valid path = raise Api_errors.(Server_error (invalid_gpgkey_path, [path])) ) +let assert_remote_pool_url_is_valid ~url = + let uri = Uri.of_string url in + match (Uri.scheme uri, Uri.host uri, Uri.path uri) with + | Some "https", Some host, path + when path = Constants.get_enabled_repository_uri + && Helpers.is_valid_ip `ipv4or6 host -> + () + | _ -> + error "Invalid url: %s, expected url format: %s" url + ("https://" ^ Constants.get_enabled_repository_uri) ; + raise Api_errors.(Server_error (invalid_base_url, [url])) + let with_pool_repositories f = Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> From 645e98cbdbd4288e61cf1496057bf40d38dad7f0 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 29 Oct 2024 01:38:34 +0000 Subject: [PATCH 005/117] CP-51836: UT for restrict/check binary_url Signed-off-by: Bengang Yuan --- ocaml/tests/test_repository.ml | 68 +++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/test_repository.ml b/ocaml/tests/test_repository.ml index 59008a61272..a2f17046fce 100644 --- a/ocaml/tests/test_repository.ml +++ b/ocaml/tests/test_repository.ml @@ -21,6 +21,7 @@ let test_introduce_duplicate_repo_name () = let name_description_1 = "description1" in let binary_url = "https://repo.example.com" in let binary_url_1 = "https://repo1.example.com" in + let binary_url_2 = "https://1.1.1.1/repository/enabled" in let source_url = "https://repo-src.example.com" in let source_url_1 = "https://repo-src1.example.com" in let gpgkey_path = "" in @@ -42,6 +43,14 @@ let test_introduce_duplicate_repo_name () = Repository.introduce_bundle ~__context ~name_label ~name_description:name_description_1 |> ignore + ) ; + Alcotest.check_raises "test_introduce_duplicate_repo_name_3" + Api_errors.(Server_error (repository_already_exists, [Ref.string_of ref])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label + ~name_description:name_description_1 ~binary_url:binary_url_2 + ~certificate:"" + |> ignore ) let test_introduce_duplicate_binary_url () = @@ -58,7 +67,7 @@ let test_introduce_duplicate_binary_url () = Repository.introduce ~__context ~name_label ~name_description ~binary_url ~source_url ~update:true ~gpgkey_path in - Alcotest.check_raises "test_introduce_duplicate_binary_url" + Alcotest.check_raises "test_introduce_duplicate_binary_url_1" Api_errors.(Server_error (repository_already_exists, [Ref.string_of ref])) (fun () -> Repository.introduce ~__context ~binary_url ~name_label:name_label_1 @@ -110,6 +119,59 @@ let test_introduce_duplicate_bundle_repo () = |> ignore ) +let test_introduce_invalid_remote_pool_repo_url () = + let __context = T.make_test_database () in + let name_label = "name" in + let name_description = "description" in + let invalid_url_1 = "http://1.1.1.1/repository/enabled" in + let invalid_url_2 = "https://1.1.1.257/repository/enabled" in + let invalid_url_3 = "https://test.com/repository/enabled" in + let invalid_url_4 = "https://1.1.1.1/other" in + let invalid_url_5 = "https://1.1.1.1" in + let invalid_url_6 = "non-url" in + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_1" + Api_errors.(Server_error (invalid_base_url, [invalid_url_1])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_1 ~certificate:"" + |> ignore + ) ; + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_2" + Api_errors.(Server_error (invalid_base_url, [invalid_url_2])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_2 ~certificate:"" + |> ignore + ) ; + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_3" + Api_errors.(Server_error (invalid_base_url, [invalid_url_3])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_3 ~certificate:"" + |> ignore + ) ; + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_4" + Api_errors.(Server_error (invalid_base_url, [invalid_url_4])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_4 ~certificate:"" + |> ignore + ) ; + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_5" + Api_errors.(Server_error (invalid_base_url, [invalid_url_5])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_5 ~certificate:"" + |> ignore + ) ; + Alcotest.check_raises "test_introduce_invalid_remote_pool_repo_url_6" + Api_errors.(Server_error (invalid_base_url, [invalid_url_6])) + (fun () -> + Repository.introduce_remote_pool ~__context ~name_label ~name_description + ~binary_url:invalid_url_6 ~certificate:"" + |> ignore + ) + let test = [ ( "test_introduce_duplicate_repo_name" @@ -128,6 +190,10 @@ let test = , `Quick , test_introduce_duplicate_bundle_repo ) + ; ( "test_introduce_invalid_remote_pool_repo_url" + , `Quick + , test_introduce_invalid_remote_pool_repo_url + ) ] let () = From 1dc89037bbc1d92e07b6cb8e3116db80577ccdc4 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Fri, 25 Oct 2024 09:29:26 +0800 Subject: [PATCH 006/117] CP-51391: Implement handling for /repository/enabled Add handler for `/repository/enabled`. Replase `/enabled` with the current enabled repository. Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel.ml | 10 ++++++- ocaml/xapi/repository.ml | 57 ++++++++++++--------------------------- ocaml/xapi/repository.mli | 3 +++ ocaml/xapi/xapi.ml | 1 + 4 files changed, 30 insertions(+), 41 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 5fb25cd26a0..cf08e829fae 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -10971,6 +10971,15 @@ let http_actions = ; ( "get_repository" , (Get, Constants.get_repository_uri, false, [], _R_LOCAL_ROOT_ONLY, []) ) + ; ( "get_enabled_repository" + , ( Get + , Constants.get_enabled_repository_uri + , false + , [] + , _R_POOL_OP ++ _R_CLIENT_CERT + , [] + ) + ) ; ( "get_host_updates" , ( Get , Constants.get_host_updates_uri @@ -11014,7 +11023,6 @@ let public_http_actions_with_no_rbac_check = ; "post_jsonrpc" ; "post_jsonrpc_options" ; "get_pool_update_download" - ; "get_repository" ] (* permissions not associated with any object message or field *) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index fc3d0f93d33..630978b3db3 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -514,48 +514,25 @@ let get_host_updates_in_json ~__context ~installed = let get_repository_handler (req : Http.Request.t) s _ = let open Http in - let open Xapi_stdext_std.Xstringext in - debug "Repository.get_repository_handler URL %s" req.Request.uri ; + debug "%s URL: %s" __FUNCTION__ req.Request.uri ; req.Request.close <- true ; - if Fileserver.access_forbidden req s then - Http_svr.response_forbidden ~req s - else - let can_be_authorized = - try - Xapi_http.with_context "get_repository_handler" req s (fun _ -> ()) ; - true - with _ -> false - in - let internal_repo_access_only = - let __context = - Context.make ~origin:(Http (req, s)) "get_repository_handler" + Fileserver.send_file Constants.get_repository_uri + !Xapi_globs.local_pool_repo_dir + req s () + +let get_enabled_repository_handler (req : Http.Request.t) s _ = + let open Http in + debug "%s URL: %s" __FUNCTION__ req.Request.uri ; + req.Request.close <- true ; + Xapi_http.with_context __FUNCTION__ req s (fun __context -> + let enabled_repo = get_single_enabled_update_repository ~__context in + let repo_name = + get_remote_repository_name ~__context ~self:enabled_repo in - Pool_features.is_enabled ~__context Features.Internal_repo_access - in - match (can_be_authorized, internal_repo_access_only) with - | false, true -> - error - "Invalid secret for authorization when Internal_repo_access is \ - enabled" ; - Http_svr.response_forbidden ~req s - | _ -> ( - try - let len = String.length Constants.get_repository_uri in - match String.sub_to_end req.Request.uri len with - | uri_path -> - let root = !Xapi_globs.local_pool_repo_dir in - Fileserver.response_file s (Helpers.resolve_uri_path ~root ~uri_path) - | exception e -> - let msg = - Printf.sprintf "Failed to get path from uri': %s" - (ExnHelper.string_of_exn e) - in - raise Api_errors.(Server_error (internal_error, [msg])) - with e -> - error "Failed to serve for request on uri %s: %s" req.Request.uri - (ExnHelper.string_of_exn e) ; - Http_svr.response_forbidden ~req s - ) + Fileserver.send_file Constants.get_enabled_repository_uri + (Filename.concat !Xapi_globs.local_pool_repo_dir repo_name) + req s () + ) let consolidate_updates_of_hosts ~repository_name ~updates_info ~hosts = Hashtbl.fold diff --git a/ocaml/xapi/repository.mli b/ocaml/xapi/repository.mli index a454adfc187..b9db7336d29 100644 --- a/ocaml/xapi/repository.mli +++ b/ocaml/xapi/repository.mli @@ -55,6 +55,9 @@ val create_pool_repository : val get_repository_handler : Http.Request.t -> Unix.file_descr -> 'a -> unit +val get_enabled_repository_handler : + Http.Request.t -> Unix.file_descr -> 'a -> unit + val get_host_updates_in_json : __context:Context.t -> installed:bool -> Yojson.Basic.t diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index ed6323663e3..b49c5f77478 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -789,6 +789,7 @@ let master_only_http_handlers = ("post_remote_db_access", remote_database_access_handler) ; ("post_remote_db_access_v2", remote_database_access_handler_v2) ; ("get_repository", Repository.get_repository_handler) + ; ("get_enabled_repository", Repository.get_enabled_repository_handler) ; ("get_updates", Xapi_pool.get_updates_handler) ] From 5afb12760545fce54bd09a1fb4ea24fac6b70152 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Wed, 30 Oct 2024 07:40:38 +0000 Subject: [PATCH 007/117] CP-51988: Fix functions not work for remote_pool repo 1. `remote_pool` repo doesn't support periodic sync updates. 2. Periodic sync updates should be auto-disabled when calling `set_repositories` and `add_repository` for `remote_pool` repo. 3. If `remote_pool` repository is enabled, it should be the single one enabled. Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_errors.ml | 16 ++++++-- ocaml/xapi-consts/api_errors.ml | 6 ++- ocaml/xapi/xapi_pool.ml | 71 +++++++++++++++++++++++---------- 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index aead3e0abc4..0258785cafc 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1905,11 +1905,19 @@ let _ = error Api_errors.bundle_repo_not_enabled [] ~doc:"Cannot sync bundle as the bundle repository is not enabled." () ; error Api_errors.can_not_sync_updates [] - ~doc:"Cannot sync updates as the bundle repository is enabled." () ; - error Api_errors.bundle_repo_should_be_single_enabled [] ~doc: - "If the bundle repository is enabled, it should be the only one enabled \ - repository of the pool." + "The currently enabled repositories do not support synchronization of \ + updates." + () ; + error Api_errors.can_not_periodic_sync_updates [] + ~doc: + "The currently enabled repositories do not support periodic automatic \ + updates." + () ; + error Api_errors.repo_should_be_single_one_enabled ["repo_types"] + ~doc: + "If the bundle repository or remote_pool repository is enabled, it \ + should be the only one enabled repository of the pool." () ; error Api_errors.repository_is_in_use [] ~doc:"The repository is in use." () ; error Api_errors.repository_cleanup_failed [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 97880cde57a..bcd0f18b3e3 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1322,8 +1322,10 @@ let bundle_repo_not_enabled = add_error "BUNDLE_REPO_NOT_ENABLED" let can_not_sync_updates = add_error "CAN_NOT_SYNC_UPDATES" -let bundle_repo_should_be_single_enabled = - add_error "BUNDLE_REPO_SHOULD_BE_SINGLE_ENABLED" +let can_not_periodic_sync_updates = add_error "CAN_NOT_PERIODIC_SYNC_UPDATES" + +let repo_should_be_single_one_enabled = + add_error "REPO_SHOULD_BE_SINGLE_ONE_ENABLED" let repository_is_in_use = add_error "REPOSITORY_IS_IN_USE" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 7a6aaa2101a..12d817c187d 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3406,21 +3406,50 @@ let enable_tls_verification ~__context = | Some self -> Xapi_cluster_host.set_tls_config ~__context ~self ~verify:true -let contains_bundle_repo ~__context ~repos = - List.exists - (fun repo -> Db.Repository.get_origin ~__context ~self:repo = `bundle) +let assert_single_repo_can_be_enabled ~__context ~repos = + let origins = repos + |> List.filter_map (fun repo -> + match Db.Repository.get_origin ~__context ~self:repo with + | (`bundle | `remote_pool) as origin -> + Some origin + | `remote -> + None + ) + |> List.fold_left + (fun acc origin -> if List.mem origin acc then acc else origin :: acc) + [] + in + match (repos, origins) with + | _ :: _ :: _, _ :: _ -> + raise + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , origins |> List.map Record_util.origin_to_string + ) + ) + | _, _ -> + () -let assert_single_bundle_repo_can_be_enabled ~__context ~repos = - if List.length repos > 1 && contains_bundle_repo ~__context ~repos then - raise Api_errors.(Server_error (bundle_repo_should_be_single_enabled, [])) +let assert_can_sync_updates ~__context ~repos = + List.iter + (fun repo -> + match Db.Repository.get_origin ~__context ~self:repo with + | `remote | `remote_pool -> + () + | `bundle -> + raise Api_errors.(Server_error (can_not_sync_updates, [])) + ) + repos -let assert_not_bundle_repo ~__context ~repos = - if contains_bundle_repo ~__context ~repos then - raise Api_errors.(Server_error (can_not_sync_updates, [])) +let can_periodic_sync_updates ~__context ~repos = + List.for_all + (fun repo -> Db.Repository.get_origin ~__context ~self:repo = `remote) + repos -let disable_auto_update_sync_for_bundle_repo ~__context ~self ~repos = - if contains_bundle_repo ~__context ~repos then ( +let disable_unsupported_periodic_sync_updates ~__context ~self ~repos = + if not (can_periodic_sync_updates ~__context ~repos) then ( Pool_periodic_update_sync.set_enabled ~__context ~value:false ; Db.Pool.set_update_sync_enabled ~__context ~self ~value:false ) @@ -3429,7 +3458,7 @@ let set_repositories ~__context ~self ~value = Xapi_pool_helpers.with_pool_operation ~__context ~self ~doc:"pool.set_repositories" ~op:`configure_repositories @@ fun () -> - assert_single_bundle_repo_can_be_enabled ~__context ~repos:value ; + assert_single_repo_can_be_enabled ~__context ~repos:value ; let existings = Db.Pool.get_repositories ~__context ~self in (* To be removed *) List.iter @@ -3453,7 +3482,7 @@ let set_repositories ~__context ~self ~value = Db.Pool.set_repositories ~__context ~self ~value ; if Db.Pool.get_repositories ~__context ~self = [] then Db.Pool.set_last_update_sync ~__context ~self ~value:Date.epoch ; - disable_auto_update_sync_for_bundle_repo ~__context ~self ~repos:value + disable_unsupported_periodic_sync_updates ~__context ~self ~repos:value let add_repository ~__context ~self ~value = Xapi_pool_helpers.with_pool_operation ~__context ~self @@ -3461,15 +3490,14 @@ let add_repository ~__context ~self ~value = @@ fun () -> let existings = Db.Pool.get_repositories ~__context ~self in if not (List.mem value existings) then ( - assert_single_bundle_repo_can_be_enabled ~__context - ~repos:(value :: existings) ; + assert_single_repo_can_be_enabled ~__context ~repos:(value :: existings) ; Db.Pool.add_repositories ~__context ~self ~value ; Db.Repository.set_hash ~__context ~self:value ~value:"" ; Repository.reset_updates_in_cache () ; - Db.Pool.set_last_update_sync ~__context ~self ~value:Date.epoch - ) ; - disable_auto_update_sync_for_bundle_repo ~__context ~self - ~repos:(value :: existings) + Db.Pool.set_last_update_sync ~__context ~self ~value:Date.epoch ; + disable_unsupported_periodic_sync_updates ~__context ~self + ~repos:(value :: existings) + ) let remove_repository ~__context ~self ~value = Xapi_pool_helpers.with_pool_operation ~__context ~self @@ -3508,7 +3536,7 @@ let sync_updates ~__context ~self ~force ~token ~token_id = ~doc:"pool.sync_updates" ~op:`sync_updates @@ fun () -> let repos = Repository_helpers.get_enabled_repositories ~__context in - assert_not_bundle_repo ~__context ~repos ; + assert_can_sync_updates ~__context ~repos ; sync_repos ~__context ~self ~repos ~force ~token ~token_id let check_update_readiness ~__context ~self:_ ~requires_reboot = @@ -3793,7 +3821,8 @@ let set_update_sync_enabled ~__context ~self ~value = repositories." ; raise Api_errors.(Server_error (no_repositories_configured, [])) | repos -> - assert_not_bundle_repo ~__context ~repos + if not (can_periodic_sync_updates ~__context ~repos) then + raise Api_errors.(Server_error (can_not_periodic_sync_updates, [])) ) ; Pool_periodic_update_sync.set_enabled ~__context ~value ; Db.Pool.set_update_sync_enabled ~__context ~self ~value From 29e1fe4e25622a8ee2fb1f87c41a3a28ed9d7947 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Wed, 30 Oct 2024 17:13:24 +0800 Subject: [PATCH 008/117] CP-51988: UT for fix functions not work for remote_pool repo Signed-off-by: Bengang Yuan --- ocaml/tests/test_pool_repository.ml | 197 +++++++++++++++++----------- 1 file changed, 117 insertions(+), 80 deletions(-) diff --git a/ocaml/tests/test_pool_repository.ml b/ocaml/tests/test_pool_repository.ml index bdfcc314e20..4d0bdb45ee6 100644 --- a/ocaml/tests/test_pool_repository.ml +++ b/ocaml/tests/test_pool_repository.ml @@ -14,101 +14,138 @@ module T = Test_common -let test_set_remote_and_bundle_repos () = +let on_repositories f = let __context = T.make_test_database () in - let name_label = "remote" in - let name_description = "remote" in - let binary_url = "https://repo.example.com" in + let pool = Helpers.get_pool ~__context in + let binary_url_1 = "https://repo.example.com" in + let binary_url_2 = "https://1.1.1.1/repository/enabled" in let source_url = "https://repo-src.example.com" in - let gpgkey_path = "" in let ref_remote = - Repository.introduce ~__context ~name_label ~name_description ~binary_url - ~source_url ~update:true ~gpgkey_path + Repository.introduce ~__context ~name_label:"remote" + ~name_description:"remote" ~binary_url:binary_url_1 ~source_url + ~update:true ~gpgkey_path:"" in let ref_bundle = Repository.introduce_bundle ~__context ~name_label:"bundle" ~name_description:"bundle" in - let self = Helpers.get_pool ~__context in - Alcotest.check_raises "test_set_remote_and_bundle_repos" - Api_errors.(Server_error (bundle_repo_should_be_single_enabled, [])) - (fun () -> - Xapi_pool.set_repositories ~__context ~self - ~value:[ref_remote; ref_bundle] - ) - -let test_add_bundle_repo () = - let __context = T.make_test_database () in - let name_label = "remote" in - let name_description = "remote" in - let binary_url = "https://repo.example.com" in - let source_url = "https://repo-src.example.com" in - let gpgkey_path = "" in - let ref_remote = - Repository.introduce ~__context ~name_label ~name_description ~binary_url - ~source_url ~update:true ~gpgkey_path + let ref_remote_pool = + Repository.introduce_remote_pool ~__context ~name_label:"remote_pool" + ~binary_url:binary_url_2 ~name_description:"remote_pool" ~certificate:"" in - let ref_bundle = - Repository.introduce_bundle ~__context ~name_label:"bundle" - ~name_description:"bundle" - in - let self = Helpers.get_pool ~__context in - Alcotest.check_raises "test_add_bundle_repo" - Api_errors.(Server_error (bundle_repo_should_be_single_enabled, [])) - (fun () -> - Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; - Xapi_pool.add_repository ~__context ~self ~value:ref_bundle - ) + f __context pool ref_remote ref_bundle ref_remote_pool -let test_add_remote_repo () = - let __context = T.make_test_database () in - let name_label = "remote" in - let name_description = "remote" in - let binary_url = "https://repo.example.com" in - let source_url = "https://repo-src.example.com" in - let gpgkey_path = "" in - let ref_remote = - Repository.introduce ~__context ~name_label ~name_description ~binary_url - ~source_url ~update:true ~gpgkey_path - in - let ref_bundle = - Repository.introduce_bundle ~__context ~name_label:"bundle" - ~name_description:"bundle" - in - let self = Helpers.get_pool ~__context in - Alcotest.check_raises "test_add_remote_repo" - Api_errors.(Server_error (bundle_repo_should_be_single_enabled, [])) - (fun () -> +let test_set_repositories () = + on_repositories (fun __context self ref_remote ref_bundle ref_remote_pool -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; - Xapi_pool.add_repository ~__context ~self ~value:ref_remote - ) + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool] ; + Alcotest.check_raises "test_set_repositories_1" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [Record_util.origin_to_string `bundle] + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self + ~value:[ref_remote; ref_bundle] + ) ; + Alcotest.check_raises "test_set_repositories_2" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [`bundle; `remote_pool] |> List.map Record_util.origin_to_string + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self + ~value:[ref_remote_pool; ref_bundle] + ) ; + Alcotest.check_raises "test_set_repositories_3" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [Record_util.origin_to_string `remote_pool] + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self + ~value:[ref_remote; ref_remote_pool] + ) + ) -let test_can_not_enable_bundle_repo_auto_sync () = - let __context = T.make_test_database () in - let ref_bundle = - Repository.introduce_bundle ~__context ~name_label:"bundle" - ~name_description:"bundle" - in - let self = Helpers.get_pool ~__context in - Alcotest.check_raises "test_can_not_enable_bundle_repo_auto_sync" - Api_errors.(Server_error (can_not_sync_updates, [])) - (fun () -> - Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; - Xapi_pool.set_update_sync_enabled ~__context ~self ~value:true - ) +let test_add_repository () = + on_repositories (fun __context self ref_remote ref_bundle ref_remote_pool -> + Alcotest.check_raises "test_add_repository_1" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [Record_util.origin_to_string `bundle] + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; + Xapi_pool.add_repository ~__context ~self ~value:ref_bundle + ) ; + Alcotest.check_raises "test_add_repository_2" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [Record_util.origin_to_string `remote_pool] + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; + Xapi_pool.add_repository ~__context ~self ~value:ref_remote_pool + ) ; + Alcotest.check_raises "test_add_repository_3" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [`remote_pool; `bundle] |> List.map Record_util.origin_to_string + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool] ; + Xapi_pool.add_repository ~__context ~self ~value:ref_bundle + ) ; + Alcotest.check_raises "test_add_repository_4" + Api_errors.( + Server_error + ( repo_should_be_single_one_enabled + , [`bundle; `remote_pool] |> List.map Record_util.origin_to_string + ) + ) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; + Xapi_pool.add_repository ~__context ~self ~value:ref_remote_pool + ) + ) + +let test_enable_periodic_repo_sync () = + on_repositories (fun __context self ref_remote ref_bundle ref_remote_pool -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; + Xapi_pool.set_update_sync_enabled ~__context ~self ~value:true ; + Alcotest.check_raises "test_enable_periodic_repo_sync_1" + Api_errors.(Server_error (can_not_periodic_sync_updates, [])) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; + Xapi_pool.set_update_sync_enabled ~__context ~self ~value:true + ) ; + Alcotest.check_raises "test_enable_periodic_repo_sync_2" + Api_errors.(Server_error (can_not_periodic_sync_updates, [])) + (fun () -> + Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool] ; + Xapi_pool.set_update_sync_enabled ~__context ~self ~value:true + ) + ) let test = [ - ( "test_set_remote_and_bundle_repos" - , `Quick - , test_set_remote_and_bundle_repos - ) - ; ("test_add_bundle_repo", `Quick, test_add_bundle_repo) - ; ("test_add_remote_repo", `Quick, test_add_remote_repo) - ; ( "test_can_not_enable_bundle_repo_auto_sync" - , `Quick - , test_can_not_enable_bundle_repo_auto_sync - ) + ("test_set_repositories", `Quick, test_set_repositories) + ; ("test_add_repository", `Quick, test_add_repository) + ; ("test_enable_periodic_repo_sync", `Quick, test_enable_periodic_repo_sync) ] let () = From 77de0da5c8a12f0b68a0ed648c2fdf6a919493e1 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 11 Dec 2024 13:55:33 +0800 Subject: [PATCH 009/117] Renaming and formatting Signed-off-by: Gang Ji --- ocaml/xapi/repository.ml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index f9af339734b..211cd4ac535 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -223,19 +223,22 @@ let sync ~__context ~self ~token ~token_id = ignore (Helpers.call_script cmd params) ) (fun () -> - (* Rewrite repo conf file as initial content to remove credential related info, - * I.E. proxy username/password and temporary token file path. + (* Rewrite repo conf file as initial content to remove credential + * related info, I.E. proxy username/password and temporary token file + * path. *) write_initial_yum_config () ) ; - (* The custom yum-utils will fully download repository metadata.*) - let repodata_dir = + (* The custom yum-utils will fully download repository metadata including + * the repo gpg signature. + *) + let repo_gpg_signature = !Xapi_globs.local_pool_repo_dir // repo_name // "repodata" // "repomd.xml.asc" in - Sys.file_exists repodata_dir + Sys.file_exists repo_gpg_signature with e -> error "Failed to sync with remote YUM repository: %s" (ExnHelper.string_of_exn e) ; From 8e1bc731bfb4f229a1a5dac01880d349efa775e1 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 5 Nov 2024 10:28:18 +0800 Subject: [PATCH 010/117] Update repository.binary_url description Signed-off-by: Gang Ji --- ocaml/idl/datamodel_repository.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_repository.ml b/ocaml/idl/datamodel_repository.ml index e2a8f03b0c5..d1daa55e85c 100644 --- a/ocaml/idl/datamodel_repository.ml +++ b/ocaml/idl/datamodel_repository.ml @@ -105,7 +105,9 @@ let introduce_remote_pool = ; ( String , "binary_url" , "Base URL of binary packages in the local repository of this remote \ - pool in https:///repository format" + pool in https://" + ^ Constants.get_enabled_repository_uri + ^ " format" ) ; ( String , "certificate" From e2aa82e64e07b4f89c66d6ceaeeb6e5793e5d5b6 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 25 Oct 2024 13:30:04 +0800 Subject: [PATCH 011/117] CP-50787 CP-51347: Support pool.sync_updates from remote_pool repo When a remote_pool type repository, which points to the enabled repository in the remote pool coordinator, is set as the enabled repository of the pool, updates can be synced from it with API pool.sync_updates. The username password of the remote pool coordinator is required as parameters for pool.sync_updates to login the remote pool. And the remote pool coordinator's host server certificate needs to be configured in the remote_pool repository, it will be used to verify the remote end when sending out username passwords and syncing updates from it. A new yum/dnf plugin "xapitoken" is introduced to set xapi token as HTTP cookie: "session_id" for each HTTP request which downloads files from the remote_pool repository. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_errors.ml | 10 ++ ocaml/idl/datamodel_pool.ml | 14 ++ ocaml/xapi-cli-server/cli_frontend.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 3 + ocaml/xapi-consts/api_errors.ml | 6 + ocaml/xapi/helpers.ml | 21 +-- ocaml/xapi/pool_periodic_update_sync.ml | 3 +- ocaml/xapi/repository.ml | 163 +++++++++++++++++------- ocaml/xapi/repository.mli | 2 + ocaml/xapi/repository_helpers.ml | 87 ++++++++++--- ocaml/xapi/xapi_pool.ml | 13 +- ocaml/xapi/xapi_pool.mli | 2 + python3/dnf_plugins/accesstoken.py | 2 +- python3/dnf_plugins/ptoken.py | 2 + python3/dnf_plugins/xapitoken.py | 49 +++++++ python3/tests/test_dnf_plugins.py | 63 ++++++++- scripts/Makefile | 2 + scripts/yum-plugins/accesstoken.py | 8 +- scripts/yum-plugins/ptoken.py | 2 +- scripts/yum-plugins/xapitoken.conf | 2 + scripts/yum-plugins/xapitoken.py | 48 +++++++ 21 files changed, 414 insertions(+), 90 deletions(-) create mode 100644 python3/dnf_plugins/xapitoken.py create mode 100644 scripts/yum-plugins/xapitoken.conf create mode 100644 scripts/yum-plugins/xapitoken.py diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index d1c3bf0ac0c..1c4f19dacff 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1926,6 +1926,16 @@ let _ = "If the bundle repository or remote_pool repository is enabled, it \ should be the only one enabled repository of the pool." () ; + error Api_errors.update_syncing_remote_pool_coordinator_connection_failed [] + ~doc: + "There was an error connecting to the remote pool coordinator while \ + syncing updates from it." + () ; + error Api_errors.update_syncing_remote_pool_coordinator_service_failed [] + ~doc: + "There was an error connecting to the server while syncing updates from \ + it. The service contacted didn't reply properly." + () ; error Api_errors.repository_is_in_use [] ~doc:"The repository is in use." () ; error Api_errors.repository_cleanup_failed [] ~doc:"Failed to clean up local repository on coordinator." () ; diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index ab0d1669788..a86a3ea152c 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1282,6 +1282,20 @@ let sync_updates = ; param_release= numbered_release "1.329.0" ; param_default= Some (VString "") } + ; { + param_type= String + ; param_name= "username" + ; param_doc= "The username of the remote pool" + ; param_release= numbered_release "24.39.0-next" + ; param_default= Some (VString "") + } + ; { + param_type= String + ; param_name= "password" + ; param_doc= "The password of the remote pool" + ; param_release= numbered_release "24.39.0-next" + ; param_default= Some (VString "") + } ] ~result:(String, "The SHA256 hash of updateinfo.xml.gz") ~allowed_roles:(_R_POOL_OP ++ _R_CLIENT_CERT) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index c4a5a4a5dc2..4b5fa9476ae 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -511,7 +511,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "pool-sync-updates" , { reqd= [] - ; optn= ["force"; "token"; "token-id"] + ; optn= ["force"; "token"; "token-id"; "username"; "password"] ; help= "Sync updates from remote YUM repository, pool-wide." ; implementation= No_fd Cli_operations.pool_sync_updates ; flags= [] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 62a655b9564..8c9a1dbaf2b 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -1833,8 +1833,11 @@ let pool_sync_updates printer rpc session_id params = let force = get_bool_param params "force" in let token = get_param params "token" ~default:"" in let token_id = get_param params "token-id" ~default:"" in + let username = get_param params "username" ~default:"" in + let password = get_param params "password" ~default:"" in let hash = Client.Pool.sync_updates ~rpc ~session_id ~self:pool ~force ~token ~token_id + ~username ~password in printer (Cli_printer.PList [hash]) diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 6e9b7fdbe06..04912fb4932 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1330,6 +1330,12 @@ let can_not_periodic_sync_updates = add_error "CAN_NOT_PERIODIC_SYNC_UPDATES" let repo_should_be_single_one_enabled = add_error "REPO_SHOULD_BE_SINGLE_ONE_ENABLED" +let update_syncing_remote_pool_coordinator_connection_failed = + add_error "UPDATE_SYNCING_REMOTE_POOL_COORDINATOR_CONNECTION_FAILED" + +let update_syncing_remote_pool_coordinator_service_failed = + add_error "UPDATE_SYNCING_REMOTE_POOL_COORDINATOR_SERVICE_FAILED" + let repository_is_in_use = add_error "REPOSITORY_IS_IN_USE" let repository_cleanup_failed = add_error "REPOSITORY_CLEANUP_FAILED" diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index aff7f383e46..6bd81a84fa1 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -2030,19 +2030,24 @@ let with_temp_file ?mode prefix suffix f = let path, channel = Filename.open_temp_file ?mode prefix suffix in finally (fun () -> f (path, channel)) (fun () -> Unix.unlink path) +let with_temp_file_of_content ?mode prefix suffix content f = + let@ temp_file, temp_out_ch = with_temp_file ?mode prefix suffix in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> output_string temp_out_ch content) + (fun () -> close_out temp_out_ch) ; + f temp_file + let with_temp_out_ch_of_temp_file ?mode prefix suffix f = let@ path, channel = with_temp_file ?mode prefix suffix in f (path, channel |> with_temp_out_ch) -let make_external_host_verified_rpc ~__context ext_host_address ext_host_cert - xml = - let@ temp_file, temp_out_ch = with_temp_file "external-host-cert" ".pem" in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> output_string temp_out_ch ext_host_cert) - (fun () -> close_out temp_out_ch) ; +let make_external_host_verified_rpc ~__context host_address host_cert xml = + let@ cert_file = + with_temp_file_of_content "external-host-cert-" ".pem" host_cert + in make_remote_rpc ~__context - ~verify_cert:(Stunnel_client.external_host temp_file) - ext_host_address xml + ~verify_cert:(Stunnel_client.external_host cert_file) + host_address xml module FileSys : sig (* bash-like interface for manipulating files *) diff --git a/ocaml/xapi/pool_periodic_update_sync.ml b/ocaml/xapi/pool_periodic_update_sync.ml index a9755d0cf1e..07bb44965d7 100644 --- a/ocaml/xapi/pool_periodic_update_sync.ml +++ b/ocaml/xapi/pool_periodic_update_sync.ml @@ -140,7 +140,8 @@ let rec update_sync () = ignore (Client.Pool.sync_updates ~rpc ~session_id ~self:(Helpers.get_pool ~__context) - ~force:false ~token:"" ~token_id:"" + ~force:false ~token:"" ~token_id:"" ~username:"" + ~password:"" ) with e -> let exc = Printexc.to_string e in diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index 211cd4ac535..8e43d72ecba 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -149,26 +149,71 @@ let get_proxy_params ~__context repo_name = | _ -> ("", "", "") -let sync ~__context ~self ~token ~token_id = +let sync ~__context ~self ~token ~token_id ~username ~password = try let repo_name = get_remote_repository_name ~__context ~self in remove_repo_conf_file repo_name ; - let binary_url, source_url = - match Db.Repository.get_origin ~__context ~self with + let origin = Db.Repository.get_origin ~__context ~self in + + let binary_url, source_url, use_proxy, client_auth, server_auth = + match origin with | `remote -> + let plugin = "accesstoken" in ( Db.Repository.get_binary_url ~__context ~self , Some (Db.Repository.get_source_url ~__context ~self) + , true + , CdnTokenAuth {token_id; token; plugin} + , DefaultAuth ) | `bundle -> let uri = Uri.make ~scheme:"file" ~path:!Xapi_globs.bundle_repository_dir () in - (Uri.to_string uri, None) + (Uri.to_string uri, None, false, NoAuth, NoAuth) | `remote_pool -> - (* TODO: sync with Stunnel.with_client_proxy as otherwise yum - reposync will fail when checking the self signed certificate on - the remote pool. *) - ("", None) + let cert = Db.Repository.get_certificate ~__context ~self in + let repo_binary_url = Db.Repository.get_binary_url ~__context ~self in + let remote_addr = + repo_binary_url |> Repository_helpers.get_remote_pool_coordinator_ip + in + let verified_rpc = + try + Helpers.make_external_host_verified_rpc ~__context remote_addr + cert + with Xmlrpc_client.Connection_reset -> + raise + (Api_errors.Server_error + ( Api_errors + .update_syncing_remote_pool_coordinator_connection_failed + , [] + ) + ) + in + let session_id = + try + Client.Client.Session.login_with_password ~rpc:verified_rpc + ~uname:username ~pwd:password + ~version:Datamodel_common.api_version_string + ~originator:Xapi_version.xapi_user_agent + with + | Http_client.Http_request_rejected _ | Http_client.Http_error _ -> + raise + (Api_errors.Server_error + ( Api_errors + .update_syncing_remote_pool_coordinator_service_failed + , [] + ) + ) + in + let xapi_token = session_id |> Ref.string_of in + let plugin = "xapitoken" in + ( repo_binary_url + , None + , true + , PoolExtHostAuth {xapi_token; plugin} + , StunnelClientProxyAuth + {cert; remote_addr; remote_port= Constants.default_ssl_port} + ) in let gpgkey_path = match Db.Repository.get_gpgkey_path ~__context ~self with @@ -177,57 +222,78 @@ let sync ~__context ~self ~token ~token_id = | s -> s in - let write_initial_yum_config () = + let write_initial_yum_config ~binary_url = write_yum_config ~source_url ~binary_url ~repo_gpgcheck:true ~gpgkey_path ~repo_name in - write_initial_yum_config () ; - clean_yum_cache repo_name ; - (* Remove imported YUM repository GPG key *) - if Pkgs.manager = Yum then - Xapi_stdext_unix.Unixext.rm_rec (get_repo_config repo_name "gpgdir") ; Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - with_access_token ~token ~token_id @@ fun token_path -> - (* Configure proxy and token *) - let token_param = - match token_path with - | Some p -> - Printf.sprintf "--setopt=%s.accesstoken=file://%s" repo_name p - | None -> - "" - in - let proxy_url_param, proxy_username_param, proxy_password_param = - get_proxy_params ~__context repo_name - in - let Pkg_mgr.{cmd; params} = - [ - "--save" - ; proxy_url_param - ; proxy_username_param - ; proxy_password_param - ; token_param - ] - |> fun config -> Pkgs.config_repo ~repo_name ~config + let config_repo params = + let Pkg_mgr.{cmd; params} = + "--save" :: params |> fun config -> + Pkgs.config_repo ~repo_name ~config + in + ignore (Helpers.call_script ~log_output:Helpers.On_failure cmd params) in - ignore (Helpers.call_script ~log_output:Helpers.On_failure cmd params) ; - (* Import YUM repository GPG key to check metadata in reposync *) - let Pkg_mgr.{cmd; params} = Pkgs.make_cache ~repo_name in - ignore (Helpers.call_script cmd params) ; + let make_cache () = + (* Import YUM repository GPG key to check metadata in reposync *) + let Pkg_mgr.{cmd; params} = Pkgs.make_cache ~repo_name in + ignore (Helpers.call_script cmd params) + in (* Sync with remote repository *) - let Pkg_mgr.{cmd; params} = Pkgs.sync_repo ~repo_name in - Unixext.mkdir_rec !Xapi_globs.local_pool_repo_dir 0o700 ; + let sync_repo () = + let Pkg_mgr.{cmd; params} = Pkgs.sync_repo ~repo_name in + Unixext.mkdir_rec !Xapi_globs.local_pool_repo_dir 0o700 ; + clean_yum_cache repo_name ; + ignore (Helpers.call_script cmd params) + in + + with_sync_client_auth client_auth @@ fun client_auth -> + with_sync_server_auth server_auth @@ fun binary_url' -> + write_initial_yum_config + ~binary_url:(Option.value binary_url' ~default:binary_url) ; clean_yum_cache repo_name ; - ignore (Helpers.call_script cmd params) + (* Remove imported YUM repository GPG key *) + if Pkgs.manager = Yum then + Xapi_stdext_unix.Unixext.rm_rec (get_repo_config repo_name "gpgdir") ; + let auth_params = + match client_auth with + | Some (auth_file, plugin) -> + let token_param = + Printf.sprintf "--setopt=%s.%s=%s" repo_name plugin + (Uri.make ~scheme:"file" ~path:auth_file () |> Uri.to_string) + in + [token_param] + | None -> + [] + in + let proxy_params = + match use_proxy with + | true -> + let proxy_url_param, proxy_username_param, proxy_password_param = + get_proxy_params ~__context repo_name + in + [proxy_url_param; proxy_username_param; proxy_password_param] + | false -> + [] + in + config_repo (auth_params @ proxy_params) ; + make_cache () ; + sync_repo () ) (fun () -> (* Rewrite repo conf file as initial content to remove credential * related info, I.E. proxy username/password and temporary token file * path. + * One thing to note: for remote_repo, the binary_url used to + * re-initial yum repo is the url configed in the remote_pool repo, + * which is not the correct one for stunnel client proxy, while as we + * will always write_initial_yum_config every time before syncing repo, + * this should be ok. *) - write_initial_yum_config () + write_initial_yum_config ~binary_url ) ; (* The custom yum-utils will fully download repository metadata including * the repo gpg signature. @@ -239,10 +305,13 @@ let sync ~__context ~self ~token ~token_id = // "repomd.xml.asc" in Sys.file_exists repo_gpg_signature - with e -> - error "Failed to sync with remote YUM repository: %s" - (ExnHelper.string_of_exn e) ; - raise Api_errors.(Server_error (reposync_failed, [])) + with + | Api_errors.Server_error (_, _) as e -> + raise e + | e -> + error "Failed to sync with remote YUM repository: %s" + (ExnHelper.string_of_exn e) ; + raise Api_errors.(Server_error (reposync_failed, [])) let http_get_host_updates_in_json ~__context ~host ~installed = let host_session_id = diff --git a/ocaml/xapi/repository.mli b/ocaml/xapi/repository.mli index 5e1c78690fb..3049c003400 100644 --- a/ocaml/xapi/repository.mli +++ b/ocaml/xapi/repository.mli @@ -48,6 +48,8 @@ val sync : -> self:[`Repository] API.Ref.t -> token:string -> token_id:string + -> username:string + -> password:string -> bool val create_pool_repository : diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 4016a158237..ea7495b32b1 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -231,18 +231,22 @@ let assert_gpgkey_path_is_valid path = raise Api_errors.(Server_error (invalid_gpgkey_path, [path])) ) -let assert_remote_pool_url_is_valid ~url = +let get_remote_pool_coordinator_ip url = let uri = Uri.of_string url in match (Uri.scheme uri, Uri.host uri, Uri.path uri) with | Some "https", Some host, path when path = Constants.get_enabled_repository_uri && Helpers.is_valid_ip `ipv4or6 host -> - () + host | _ -> error "Invalid url: %s, expected url format: %s" url ("https://" ^ Constants.get_enabled_repository_uri) ; raise Api_errors.(Server_error (invalid_base_url, [url])) +let assert_remote_pool_url_is_valid ~url = + get_remote_pool_coordinator_ip url + |> Xapi_stdext_pervasives.Pervasiveext.ignore_string + let with_pool_repositories f = Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> @@ -1284,26 +1288,69 @@ let get_single_enabled_update_repository ~__context = in get_singleton enabled_update_repositories -let with_access_token ~token ~token_id f = - match (token, token_id) with - | t, tid when t <> "" && tid <> "" -> - info "sync updates with token_id: %s" tid ; - let json = `Assoc [("token", `String t); ("token_id", `String tid)] in - let tmpfile, tmpch = - Filename.open_temp_file ~mode:[Open_text] "accesstoken" ".json" +type client_auth = + | CdnTokenAuth (* remote *) of { + token_id: string + ; token: string + ; plugin: string + } + | NoAuth (* bundle *) + | PoolExtHostAuth (* remote_pool *) of {xapi_token: string; plugin: string} + +let with_sync_client_auth auth f = + let go_with_client_plugin cred plugin = + let ( let@ ) g x = g x in + let@ temp_file = + Helpers.with_temp_file_of_content ~mode:[Open_text] "token-" ".json" cred + in + f (Some (temp_file, plugin)) + in + match auth with + | CdnTokenAuth {token_id; token; _} when token_id = "" && token = "" -> + f None + | CdnTokenAuth {token_id; token; plugin} -> + let cred = + `Assoc [("token", `String token); ("token_id", `String token_id)] + |> Yojson.Basic.to_string in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - output_string tmpch (Yojson.Basic.to_string json) ; - close_out tmpch ; - f (Some tmpfile) - ) - (fun () -> Unixext.unlink_safe tmpfile) - | t, tid when t = "" && tid = "" -> + go_with_client_plugin cred plugin + | PoolExtHostAuth {xapi_token; plugin} -> + let cred = + `Assoc [("xapitoken", `String xapi_token)] |> Yojson.Basic.to_string + in + go_with_client_plugin cred plugin + | NoAuth -> f None - | _ -> - let msg = Printf.sprintf "%s: The token or token_id is empty" __LOC__ in - raise Api_errors.(Server_error (internal_error, [msg])) + +type server_auth = + | DefaultAuth (* remote *) + | NoAuth (* bundle *) + | StunnelClientProxyAuth (* remote_pool *) of { + cert: string + ; remote_addr: string + ; remote_port: int + } + +let with_sync_server_auth auth f = + match auth with + | DefaultAuth | NoAuth -> + f None + | StunnelClientProxyAuth {cert; remote_addr; remote_port} -> + let local_host = "127.0.0.1" in + let local_port = !Xapi_globs.local_yum_repo_port in + let ( let@ ) f x = f x in + let@ temp_file = + Helpers.with_temp_file_of_content "external-host-cert-" ".pem" cert + in + let binary_url = + Uri.make ~scheme:"http" ~host:local_host ~port:local_port + ~path:Constants.get_enabled_repository_uri () + |> Uri.to_string + in + Stunnel.with_client_proxy + ~verify_cert:(Stunnel_client.external_host temp_file) + ~remote_host:remote_addr ~remote_port ~local_host ~local_port + @@ fun () -> f (Some binary_url) let prune_updateinfo_for_livepatches latest_lps updateinfo = let livepatches = diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 5eec626c601..9b7953306dc 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3559,12 +3559,15 @@ let remove_repository ~__context ~self ~value = if Db.Pool.get_repositories ~__context ~self = [] then Db.Pool.set_last_update_sync ~__context ~self ~value:Date.epoch -let sync_repos ~__context ~self ~repos ~force ~token ~token_id = +let sync_repos ~__context ~self ~repos ~force ~token ~token_id ~username + ~password = let open Repository in repos |> List.iter (fun repo -> if force then cleanup_pool_repo ~__context ~self:repo ; - let complete = sync ~__context ~self:repo ~token ~token_id in + let complete = + sync ~__context ~self:repo ~token ~token_id ~username ~password + in (* Dnf and custom yum-utils sync all the metadata including updateinfo, * Thus no need to re-create pool repository *) if Pkgs.manager = Yum && complete = false then @@ -3574,14 +3577,14 @@ let sync_repos ~__context ~self ~repos ~force ~token ~token_id = Db.Pool.set_last_update_sync ~__context ~self ~value:(Date.now ()) ; checksum -let sync_updates ~__context ~self ~force ~token ~token_id = +let sync_updates ~__context ~self ~force ~token ~token_id ~username ~password = Pool_features.assert_enabled ~__context ~f:Features.Updates ; Xapi_pool_helpers.with_pool_operation ~__context ~self ~doc:"pool.sync_updates" ~op:`sync_updates @@ fun () -> let repos = Repository_helpers.get_enabled_repositories ~__context in assert_can_sync_updates ~__context ~repos ; - sync_repos ~__context ~self ~repos ~force ~token ~token_id + sync_repos ~__context ~self ~repos ~force ~token ~token_id ~username ~password let check_update_readiness ~__context ~self:_ ~requires_reboot = (* Pool license check *) @@ -3956,7 +3959,7 @@ let put_bundle_handler (req : Request.t) s _ = (fun () -> try sync_repos ~__context ~self:pool ~repos:[repo] ~force:true - ~token:"" ~token_id:"" + ~token:"" ~token_id:"" ~username:"" ~password:"" |> ignore with _ -> raise Api_errors.(Server_error (bundle_sync_failed, [])) diff --git a/ocaml/xapi/xapi_pool.mli b/ocaml/xapi/xapi_pool.mli index 835a356f782..494a486032b 100644 --- a/ocaml/xapi/xapi_pool.mli +++ b/ocaml/xapi/xapi_pool.mli @@ -360,6 +360,8 @@ val sync_updates : -> force:bool -> token:string -> token_id:string + -> username:string + -> password:string -> string val check_update_readiness : diff --git a/python3/dnf_plugins/accesstoken.py b/python3/dnf_plugins/accesstoken.py index 2537d2a6721..97635fa160b 100644 --- a/python3/dnf_plugins/accesstoken.py +++ b/python3/dnf_plugins/accesstoken.py @@ -10,7 +10,7 @@ class InvalidToken(Exception): - """Token is invlaid""" + """Token is invalid""" def __init__(self, token): super().__init__(f"Invalid token: {token}") diff --git a/python3/dnf_plugins/ptoken.py b/python3/dnf_plugins/ptoken.py index c2ea73fccc8..35b6f9aef70 100644 --- a/python3/dnf_plugins/ptoken.py +++ b/python3/dnf_plugins/ptoken.py @@ -24,6 +24,8 @@ def config(self): for repo_name in self.base.repos: repo = self.base.repos[repo_name] + # Only include the ptoken for repos with a localhost URL, for added safety. + # These will be proxied to the coordinator through stunnel, set up by xapi. if len(repo.baseurl) > 0 and repo.baseurl[0].startswith("http://127.0.0.1") \ and repo.ptoken: secret = "pool_secret=" + ptoken diff --git a/python3/dnf_plugins/xapitoken.py b/python3/dnf_plugins/xapitoken.py new file mode 100644 index 00000000000..377fe33964e --- /dev/null +++ b/python3/dnf_plugins/xapitoken.py @@ -0,0 +1,49 @@ +"""dnf plugin to set xapitoken http header for enabled repos""" +import json +import logging +# Disable the error, it can be import in production env +# and mocked out in unitttest +# pylint: disable=import-error +# pytype: disable=import-error +import dnf +import urlgrabber + + +class InvalidToken(Exception): + """Token is invalid""" + def __init__(self, token): + super().__init__(f"Invalid token: {token}") + + +#pylint: disable=too-few-public-methods +class XapiToken(dnf.Plugin): + """dnf xapitoken plugin class""" + + name = "xapitoken" + + def config(self): + """ DNF plugin config hook, + refer to https://dnf.readthedocs.io/en/latest/api_plugins.html""" + + for repo_name in self.base.repos: + repo = self.base.repos[repo_name] + + token_url = repo.xapitoken + if not token_url or token_url == '': + continue + try: + token_str = urlgrabber.urlopen(token_url).read().strip() + token = json.loads(token_str) + except Exception: #pylint: disable=broad-except + logging.debug("Failed to load token from: %s", token_url) + continue + + if not token.get('xapitoken'): + raise InvalidToken(token) + + # Only include the xapitoken for repos with a localhost URL, for added safety. + # These will be proxied to the remote pool coordinator through stunnel, set up by xapi. + if len(repo.baseurl) > 0 and repo.baseurl[0].startswith("http://127.0.0.1") \ + and repo.xapitoken: + secret = "session_id=" + str(token["xapitoken"]) + repo.set_http_headers([f'cookie:{secret}']) diff --git a/python3/tests/test_dnf_plugins.py b/python3/tests/test_dnf_plugins.py index 2f82b1eb5cb..895317f8778 100644 --- a/python3/tests/test_dnf_plugins.py +++ b/python3/tests/test_dnf_plugins.py @@ -1,4 +1,4 @@ -"""Test module for dnf accesstoken""" +"""Test module for dnf accesstoken, ptoken and xapitoken""" import unittest import sys import json @@ -17,14 +17,16 @@ accesstoken = import_file_as_module("python3/dnf_plugins/accesstoken.py") ptoken = import_file_as_module("python3/dnf_plugins/ptoken.py") +xapitoken = import_file_as_module("python3/dnf_plugins/xapitoken.py") REPO_NAME = "testrepo" -def _mock_repo(a_token=None, p_token=None, baseurl=None): +def _mock_repo(a_token=None, p_token=None, xapi_token=None, baseurl=None): mock_repo = MagicMock() mock_repo.accesstoken = a_token mock_repo.ptoken = p_token + mock_repo.xapitoken = xapi_token mock_repo.baseurl = baseurl mock_base = MagicMock() mock_base.repos = {REPO_NAME: mock_repo} @@ -103,3 +105,60 @@ def test_local_repo_does_not_enable_ptoken_should_ignore_ptoken(self, mock_open) mock_repo = _mock_repo(p_token=False, baseurl=["http://127.0.0.1/some_local_path"]) ptoken.Ptoken(mock_repo.base, MagicMock()).config() assert not mock_repo.set_http_headers.called + +@patch("xapitoken.urlgrabber") +class TestXapitoken(unittest.TestCase): + """Test class for xapitoken dnf plugin""" + + def test_set_http_header_with_xapi_token(self, mock_grabber): + """test config succeed with xapitokan""" + mock_repo = _mock_repo(xapi_token="file:///mock_xapitoken_url", + baseurl=["http://127.0.0.1/some_local_path"]) + mock_grabber.urlopen.return_value.read.return_value = json.dumps({ + "xapitoken": "valid_token", + }) + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + mock_repo.set_http_headers.assert_called_with( + ['cookie:session_id=valid_token'] + ) + + def test_repo_without_xapi_token(self, mock_grabber): + """If repo has not xapitoken, it should not be blocked""" + mock_repo = _mock_repo() + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + assert not mock_repo.set_http_headers.called + + def test_ignore_invalid_token_url(self, mock_grabber): + """If repo provided an invalid token url, it should be ignored""" + mock_repo = _mock_repo(xapi_token="Not_existed") + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + assert not mock_repo.set_http_headers.called + + def test_invalid_token_raise_exception(self, mock_grabber): + """Token with right json format, bad content should raise""" + mock_repo = _mock_repo(xapi_token="file:///file_contain_invalid_token", + baseurl=["http://127.0.0.1/some_local_path"]) + mock_grabber.urlopen.return_value.read.return_value = json.dumps({ + "bad_token": "I am bad guy" + }) + with self.assertRaises(xapitoken.InvalidToken): + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + + def test_remote_repo_ignore_xapitoken(self, mock_grabber): + """non-local repo should just ignore the xapitoken""" + mock_repo = _mock_repo(xapi_token=True, + baseurl=["http://some_remote_token/some_local_path"]) + mock_grabber.urlopen.return_value.read.return_value = json.dumps({ + "xapitoken": "valid_token", + }) + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + assert not mock_repo.set_http_headers.called + + def test_local_repo_does_not_enable_xapitoken_should_ignore_xapitoken(self, mock_grabber): + """local repo which has not enabled xapitoken should just ignore the xapitoken""" + mock_repo = _mock_repo(xapi_token=False, baseurl=["http://127.0.0.1/some_local_path"]) + mock_grabber.urlopen.return_value.read.return_value = json.dumps({ + "xapitoken": "valid_token", + }) + xapitoken.XapiToken(mock_repo.base, MagicMock()).config() + assert not mock_repo.set_http_headers.called diff --git a/scripts/Makefile b/scripts/Makefile index 503e7838546..6bb740e9df8 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -143,6 +143,8 @@ install: # YUM plugins $(IPROG) yum-plugins/accesstoken.py $(DESTDIR)$(YUMPLUGINDIR) $(IDATA) yum-plugins/accesstoken.conf $(DESTDIR)$(YUMPLUGINCONFDIR) + $(IPROG) yum-plugins/xapitoken.py $(DESTDIR)$(YUMPLUGINDIR) + $(IDATA) yum-plugins/xapitoken.conf $(DESTDIR)$(YUMPLUGINCONFDIR) $(IPROG) yum-plugins/ptoken.py $(DESTDIR)$(YUMPLUGINDIR) $(IDATA) yum-plugins/ptoken.conf $(DESTDIR)$(YUMPLUGINCONFDIR) # maillanguages diff --git a/scripts/yum-plugins/accesstoken.py b/scripts/yum-plugins/accesstoken.py index 0f549f27121..a83d7bf2ce9 100644 --- a/scripts/yum-plugins/accesstoken.py +++ b/scripts/yum-plugins/accesstoken.py @@ -11,16 +11,16 @@ # The content of the file referred by the looks like: # { 'token': '...', 'token_id': '...' } +import json from yum import config from yum.plugins import TYPE_CORE -import json import urlgrabber requires_api_version = '2.5' plugin_type = (TYPE_CORE,) -def config_hook(conduit): +def config_hook(conduit): # pylint: disable=unused-argument config.RepoConf.accesstoken = config.UrlOption() def init_hook(conduit): @@ -35,11 +35,11 @@ def init_hook(conduit): try: token_str = urlgrabber.urlopen(token_url).read().strip() token = json.loads(token_str) - except: + except Exception: #pylint: disable=broad-except continue if not (token['token'] and token['token_id']): - raise Exception("Invalid token or token_id") + raise Exception("Invalid token or token_id") #pylint: disable=broad-exception-raised repo.http_headers['X-Access-Token'] = str(token['token']) repo.http_headers['Referer'] = str(token['token_id']) diff --git a/scripts/yum-plugins/ptoken.py b/scripts/yum-plugins/ptoken.py index 74536e19ee8..0bc0cca0a6d 100755 --- a/scripts/yum-plugins/ptoken.py +++ b/scripts/yum-plugins/ptoken.py @@ -25,7 +25,7 @@ def init_hook(conduit): for name in repos.repos: repo = repos.repos[name] # Only include the ptoken for repos with a localhost URL, for added safety. - # These may be proxied to the coordinator through stunnel, set up by xapi. + # These will be proxied to the coordinator through stunnel, set up by xapi. if len(repo.baseurl) > 0 and repo.baseurl[0].startswith("http://127.0.0.1") \ and repo.getConfigOption('ptoken'): repo.http_headers['cookie'] = "pool_secret=" + ptoken diff --git a/scripts/yum-plugins/xapitoken.conf b/scripts/yum-plugins/xapitoken.conf new file mode 100644 index 00000000000..8e4d76c728b --- /dev/null +++ b/scripts/yum-plugins/xapitoken.conf @@ -0,0 +1,2 @@ +[main] +enabled=1 diff --git a/scripts/yum-plugins/xapitoken.py b/scripts/yum-plugins/xapitoken.py new file mode 100644 index 00000000000..6b959c74462 --- /dev/null +++ b/scripts/yum-plugins/xapitoken.py @@ -0,0 +1,48 @@ +#!/usr/bin/python + +# Drop this file into /usr/lib/yum-plugins/ +# Enable it by creating conf file /etc/yum/pluginconf.d/xapitoken.conf: +# [main] +# enabled=1 +# +# Configure it by: +# yum-config-manager --setopt=.xapitoken=file:// --save + +# The content of the file referred by the looks like: +# { 'xapitoken': '...' } + +import json +from yum import config +from yum.plugins import TYPE_CORE +import urlgrabber + + +requires_api_version = '2.5' +plugin_type = (TYPE_CORE,) + +def config_hook(conduit): # pylint: disable=unused-argument + config.RepoConf.xapitoken = config.UrlOption() + +def init_hook(conduit): + repos = conduit.getRepos() + for name in repos.repos: + repo = repos.repos[name] + token_url = repo.getConfigOption('xapitoken') + if not token_url or token_url == '': + continue + + token = {} + try: + token_str = urlgrabber.urlopen(token_url).read().strip() + token = json.loads(token_str) + except Exception: #pylint: disable=broad-except + continue + + if not token['xapitoken']: + raise Exception("Invalid xapitoken") #pylint: disable=broad-exception-raised + + # Only include the xapitoken for repos with a localhost URL, for added safety. + # These will be proxied to the remote pool coordinator through stunnel, set up by xapi. + if len(repo.baseurl) > 0 and repo.baseurl[0].startswith("http://127.0.0.1") \ + and repo.getConfigOption('xapitoken'): + repo.http_headers['cookie'] = "session_id=" + str(token['xapitoken']) From c710e8fe13ce9ef55eb53e53cedc5440463aa98f Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 11 Dec 2024 01:07:52 +0800 Subject: [PATCH 012/117] CP-52245: Temp disable repo_gpgcheck when syncing from remote_pool repo Will re-enable repo_gpgcheck by reverting this commit after CP-51429 is done. Signed-off-by: Gang Ji --- ocaml/xapi/repository.ml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index 8e43d72ecba..8d62a27a84e 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -155,13 +155,19 @@ let sync ~__context ~self ~token ~token_id ~username ~password = remove_repo_conf_file repo_name ; let origin = Db.Repository.get_origin ~__context ~self in - let binary_url, source_url, use_proxy, client_auth, server_auth = + let ( binary_url + , source_url + , repo_gpgcheck + , use_proxy + , client_auth + , server_auth ) = match origin with | `remote -> let plugin = "accesstoken" in ( Db.Repository.get_binary_url ~__context ~self , Some (Db.Repository.get_source_url ~__context ~self) , true + , true , CdnTokenAuth {token_id; token; plugin} , DefaultAuth ) @@ -169,7 +175,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = let uri = Uri.make ~scheme:"file" ~path:!Xapi_globs.bundle_repository_dir () in - (Uri.to_string uri, None, false, NoAuth, NoAuth) + (Uri.to_string uri, None, true, false, NoAuth, NoAuth) | `remote_pool -> let cert = Db.Repository.get_certificate ~__context ~self in let repo_binary_url = Db.Repository.get_binary_url ~__context ~self in @@ -209,6 +215,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = let plugin = "xapitoken" in ( repo_binary_url , None + , false , true , PoolExtHostAuth {xapi_token; plugin} , StunnelClientProxyAuth @@ -223,7 +230,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = s in let write_initial_yum_config ~binary_url = - write_yum_config ~source_url ~binary_url ~repo_gpgcheck:true ~gpgkey_path + write_yum_config ~source_url ~binary_url ~repo_gpgcheck ~gpgkey_path ~repo_name in Xapi_stdext_pervasives.Pervasiveext.finally From b3e1ec1a29da6501df2705c3084fb185e385520c Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 26 Dec 2024 10:27:43 +0800 Subject: [PATCH 013/117] Revert "CP-52245: Temp disable repo_gpgcheck when syncing from remote_pool repo" This reverts commit c710e8fe13ce9ef55eb53e53cedc5440463aa98f. Signed-off-by: Gang Ji --- ocaml/xapi/repository.ml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index 8d62a27a84e..8e43d72ecba 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -155,19 +155,13 @@ let sync ~__context ~self ~token ~token_id ~username ~password = remove_repo_conf_file repo_name ; let origin = Db.Repository.get_origin ~__context ~self in - let ( binary_url - , source_url - , repo_gpgcheck - , use_proxy - , client_auth - , server_auth ) = + let binary_url, source_url, use_proxy, client_auth, server_auth = match origin with | `remote -> let plugin = "accesstoken" in ( Db.Repository.get_binary_url ~__context ~self , Some (Db.Repository.get_source_url ~__context ~self) , true - , true , CdnTokenAuth {token_id; token; plugin} , DefaultAuth ) @@ -175,7 +169,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = let uri = Uri.make ~scheme:"file" ~path:!Xapi_globs.bundle_repository_dir () in - (Uri.to_string uri, None, true, false, NoAuth, NoAuth) + (Uri.to_string uri, None, false, NoAuth, NoAuth) | `remote_pool -> let cert = Db.Repository.get_certificate ~__context ~self in let repo_binary_url = Db.Repository.get_binary_url ~__context ~self in @@ -215,7 +209,6 @@ let sync ~__context ~self ~token ~token_id ~username ~password = let plugin = "xapitoken" in ( repo_binary_url , None - , false , true , PoolExtHostAuth {xapi_token; plugin} , StunnelClientProxyAuth @@ -230,7 +223,7 @@ let sync ~__context ~self ~token ~token_id ~username ~password = s in let write_initial_yum_config ~binary_url = - write_yum_config ~source_url ~binary_url ~repo_gpgcheck ~gpgkey_path + write_yum_config ~source_url ~binary_url ~repo_gpgcheck:true ~gpgkey_path ~repo_name in Xapi_stdext_pervasives.Pervasiveext.finally From eed437721a1e1f2fed93caeec6cba29dac86a18c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 9 Jan 2025 10:00:27 +0000 Subject: [PATCH 014/117] python3: Add previously unused API classes to Python stubs used during testing Otherwise the CI will complain during pre-commit checks as it will fail to find methods and attributes on 'None' objects. Signed-off-by: Andrii Sultanov --- python3/stubs/XenAPI.pyi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python3/stubs/XenAPI.pyi b/python3/stubs/XenAPI.pyi index ede1e13d5f5..daaf82d3c5a 100644 --- a/python3/stubs/XenAPI.pyi +++ b/python3/stubs/XenAPI.pyi @@ -51,8 +51,14 @@ class _Dispatcher: PBD: Incomplete pool: Incomplete host: Incomplete + host_metrics: Incomplete + host_cpu: Incomplete pool_update: Incomplete VM: Incomplete + VIF: Incomplete + PIF: Incomplete + VBD: Incomplete + network: Incomplete class Session(xmlrpclib.ServerProxy): From d976582450459efcf6ce8974541dd645b6dc0ade Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Fri, 17 Jan 2025 09:53:21 +0000 Subject: [PATCH 015/117] CA-404660: Refine repository enabling error message When enabling pool's repositories, if enabling bundle repo and remoe_pool repositories at the same time, it returns error message: `If the bundle repository or remote_pool repository is enabled, it should be the only one enabled repository of the pool. repo_types: bundle` The `repo_types` is confusing and tedious as only these 2 types of repository can meet this error. So remove the parameter `repo_types`. Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_errors.ml | 4 +-- ocaml/tests/test_pool_repository.ml | 49 +++++------------------------ ocaml/xapi/xapi_pool.ml | 8 +---- 3 files changed, 10 insertions(+), 51 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 28fb1cc444d..0e1d4faa760 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1933,10 +1933,10 @@ let _ = "The currently enabled repositories do not support periodic automatic \ updates." () ; - error Api_errors.repo_should_be_single_one_enabled ["repo_types"] + error Api_errors.repo_should_be_single_one_enabled [] ~doc: "If the bundle repository or remote_pool repository is enabled, it \ - should be the only one enabled repository of the pool." + should be the only enabled repository of the pool." () ; error Api_errors.update_syncing_remote_pool_coordinator_connection_failed [] ~doc: diff --git a/ocaml/tests/test_pool_repository.ml b/ocaml/tests/test_pool_repository.ml index 4d0bdb45ee6..a24297f34de 100644 --- a/ocaml/tests/test_pool_repository.ml +++ b/ocaml/tests/test_pool_repository.ml @@ -41,34 +41,19 @@ let test_set_repositories () = Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool] ; Alcotest.check_raises "test_set_repositories_1" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [Record_util.origin_to_string `bundle] - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote; ref_bundle] ) ; Alcotest.check_raises "test_set_repositories_2" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [`bundle; `remote_pool] |> List.map Record_util.origin_to_string - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool; ref_bundle] ) ; Alcotest.check_raises "test_set_repositories_3" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [Record_util.origin_to_string `remote_pool] - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote; ref_remote_pool] @@ -78,45 +63,25 @@ let test_set_repositories () = let test_add_repository () = on_repositories (fun __context self ref_remote ref_bundle ref_remote_pool -> Alcotest.check_raises "test_add_repository_1" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [Record_util.origin_to_string `bundle] - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; Xapi_pool.add_repository ~__context ~self ~value:ref_bundle ) ; Alcotest.check_raises "test_add_repository_2" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [Record_util.origin_to_string `remote_pool] - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote] ; Xapi_pool.add_repository ~__context ~self ~value:ref_remote_pool ) ; Alcotest.check_raises "test_add_repository_3" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [`remote_pool; `bundle] |> List.map Record_util.origin_to_string - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_remote_pool] ; Xapi_pool.add_repository ~__context ~self ~value:ref_bundle ) ; Alcotest.check_raises "test_add_repository_4" - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , [`bundle; `remote_pool] |> List.map Record_util.origin_to_string - ) - ) + Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) (fun () -> Xapi_pool.set_repositories ~__context ~self ~value:[ref_bundle] ; Xapi_pool.add_repository ~__context ~self ~value:ref_remote_pool diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 9b7953306dc..dfdcfc3a3ed 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3466,13 +3466,7 @@ let assert_single_repo_can_be_enabled ~__context ~repos = in match (repos, origins) with | _ :: _ :: _, _ :: _ -> - raise - Api_errors.( - Server_error - ( repo_should_be_single_one_enabled - , origins |> List.map Record_util.origin_to_string - ) - ) + raise Api_errors.(Server_error (repo_should_be_single_one_enabled, [])) | _, _ -> () From 9ef7e19f4d38a7785557d50fe524ed5e1ae30325 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 5 Feb 2025 12:00:00 +0100 Subject: [PATCH 016/117] doc: walkthroughs/VM.start: Update the xenguest chapter (domain build) Signed-off-by: Bernhard Kaindl --- doc/content/xenopsd/walkthroughs/VM.start.md | 125 +++++++++++++++---- 1 file changed, 99 insertions(+), 26 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.start.md b/doc/content/xenopsd/walkthroughs/VM.start.md index 7e24b6d66ba..52201fd7218 100644 --- a/doc/content/xenopsd/walkthroughs/VM.start.md +++ b/doc/content/xenopsd/walkthroughs/VM.start.md @@ -225,8 +225,8 @@ module and looks for scripts in the hardcoded path `/etc/xapi.d`. ## 2. create a Xen domain The `VM_create` micro-op calls the `VM.create` function in the backend. -In the classic Xenopsd backend the -[VM.create_exn](https://github.com/xapi-project/xenopsd/blob/b33bab13080cea91e2fd59d5088622cd68152339/xc/xenops_server_xen.ml#L633) +In the classic Xenopsd backend, the +[VM.create_exn](https://github.com/xapi-project/xen-api/blob/bae7526faeb2a02a2fe5b71410083983f4695963/ocaml/xenopsd/xc/xenops_server_xen.ml#L1421-L1586) function must 1. check if we're creating a domain for a fresh VM or resuming an existing one: @@ -237,7 +237,13 @@ function must because domain create often fails in low-memory conditions. This means the "reservation" is associated with our "session" with squeezed; if Xenopsd crashes and restarts the reservation will be freed automatically. -3. create the Domain via the libxc hypercall +3. create the Domain via the libxc hypercall `Xenctrl.domain_create` +4. [call]( + https://github.com/xapi-project/xen-api/blob/bae7526faeb2a02a2fe5b71410083983f4695963/ocaml/xenopsd/xc/xenops_server_xen.ml#L1547) + [generate_create_info()]( + https://github.com/xapi-project/xen-api/blob/bae7526faeb2a02a2fe5b71410083983f4695963/ocaml/xenopsd/xc/xenops_server_xen.ml#L1302-L1419) + for storing the platform data (vCPUs, etc) the domain's Xenstore tree. + `xenguest` then uses this in the `build` phase (see below) to build the domain. 4. "transfer" the squeezed reservation to the domain such that squeezed will free the memory if the domain is destroyed later 5. compute and set an initial balloon target depending on the amount of memory @@ -253,10 +259,16 @@ function must ## 3. build the domain -On a Xen system a domain is created empty, and memory is actually allocated -from the host in the "build" phase via functions in *libxenguest*. The -[VM.build_domain_exn](https://github.com/xapi-project/xenopsd/blob/b33bab13080cea91e2fd59d5088622cd68152339/xc/xenops_server_xen.ml#L994) -function must +On Xen, `Xenctrl.domain_create` creates an empty domain and +returns the domain ID (`domid`) of the new domain to `xenopsd`. + +In the `build` phase, the `xenguest` program is called to create +the system memory layout of the domain, set vCPU affinity and a +lot more. + +The function +[VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024) +must 1. run pygrub (or eliloader) to extract the kernel and initrd, if necessary 2. invoke the *xenguest* binary to interact with libxenguest. @@ -266,25 +278,86 @@ function must you would use after a reboot because some properties (such as maximum memory and vCPUs) as fixed on create. -The xenguest binary was originally -a separate binary for two reasons: (i) the libxenguest functions weren't -threadsafe since they used lots of global variables; and (ii) the libxenguest -functions used to have a different, incompatible license, which prevent us -linking. Both these problems have been resolved but we still shell out to -the xenguest binary. - -The xenguest binary has also evolved to configure more of the initial domain -state. It also [reads Xenstore](https://github.com/xapi-project/ocaml-xen-lowlevel-libs/blob/master/xenguest-4.4/xenguest_stubs.c#L42) -and configures - -- the vCPU affinity -- the vCPU credit2 weight/cap parameters -- whether the NX bit is exposed -- whether the viridian CPUID leaf is exposed -- whether the system has PAE or not -- whether the system has ACPI or not -- whether the system has nested HVM or not -- whether the system has an HPET or not +### 3.1 Interface to xenguest for building domains + +[xenguest](https://github.com/xenserver/xen.pg/blob/XS-8/patches/xenguest.patch) +was originally created as a separate program due to issues with +`libxenguest` that were fixed, but we still shell out to `xenguest`: + +- Wasn't threadsafe: fixed, but it still uses a per-call global struct +- Incompatible licence, but now licensed under the LGPL. + +The `xenguest` binary has evolved to build more of the initial +domain state. `xenopsd` passes it: + +- The domain type to build for (HVM, PHV or PV), +- The `domid` of the created empty domain, +- The amount of system memory of the domain, +- The platform data (vCPUs, vCPU affinity, etc) using the Xenstore: + - the vCPU affinity + - the vCPU credit2 weight/cap parameters + - whether the NX bit is exposed + - whether the viridian CPUID leaf is exposed + - whether the system has PAE or not + - whether the system has ACPI or not + - whether the system has nested HVM or not + - whether the system has an HPET or not + +When called to build a domain, `xenguest` reads those and builds the VM accordingly. + +### 3.2 Workflow for allocating and populating domain memory + +Based on the given type, the `xenguest` program calls dedicated +functions for the build process of given domain type. + +- For HVM, this function is `stub_xc_hvm_build()`. + +These domain build functions call these functions: + +1. `get_flags()` to get the platform data from the Xenstore +2. `configure_vcpus()` which uses the platform data from the Xenstore to configure vCPU affinity and the credit scheduler parameters vCPU weight and vCPU cap (max % pCPU time for throttling) +3. For HVM, `hvm_build_setup_mem` to: + 1. Decide the `e820` memory layout of the system memory of the domain + including memory holes depending on PCI passthrough and vGPU flags. + 2. Load the BIOS/UEFI firmware images + 3. Store the final MMIO hole parameters in the Xenstore + 4. Call the `libxenguest` function + [xc_dom_boot_mem_init()](https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/tools/libs/guest/xg_dom_boot.c#L110-L126) + to allocate and map the domain's system memory. + For HVM domains, it calls + [meminit_hvm()](https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/tools/libs/guest/xg_dom_x86.c#L1348-L1648) + to loop over the `vmemranges` of the domain for mapping the system RAM + of the guest from the Xen hypervisor heap. Its goals are: + + - Attempt to allocate 1GB superpages when possible + - Fall back to 2MB pages when 1GB allocation failed + - Fall back to 4k pages when both failed + + It uses the hypercall + [XENMEM_populate_physmap()]( + https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/xen/common/memory.c#L1408-L1477) + to perform memory allocation and to map the allocated memory + to the system RAM ranges of the domain. + The hypercall must: + + 1. convert the arguments for allocating a page to hypervisor structures + 2. set flags and calls functions according to the arguments + 3. allocate the requested page at the most suitable place + + - depending on passed flags, allocate on a specific NUMA node + - else, if the domain has node affinity, on the affine nodes + - also in the most suitable memory zone within the NUMA node + + 4. fall back to less desirable places if this fails + + - or fail for "exact" allocation requests + + 5. split superpages if pages of the requested size are not available + + 5. Call `construct_cpuid_policy()` to apply the `CPUID` `featureset` policy + + For more details on the VM build step involving xenguest and Xen side see: + https://wiki.xenproject.org/wiki/Walkthrough:_VM_build_using_xenguest ## 4. mark each VBD as "active" From 5cc68f089e5b76bab7e69a1dec766e578fc52426 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 31 Jan 2025 13:48:16 +0000 Subject: [PATCH 017/117] Fixed outdated copyright dates. Signed-off-by: Konstantina Chremmou --- doc/content/xen-api/topics/vm-lifecycle.md | 2 +- ocaml/idl/datamodel_pool.ml | 4 ++-- ocaml/sdk-gen/c/README.dist | 2 +- ocaml/sdk-gen/csharp/autogen/README.md | 2 +- ocaml/sdk-gen/csharp/autogen/src/README-NuGet.md | 2 +- ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj | 2 +- ocaml/sdk-gen/go/README.md | 2 +- .../java/autogen/xen-api/src/main/resources/README.txt | 2 +- ocaml/sdk-gen/powershell/autogen/README.md | 2 +- ocaml/sdk-gen/powershell/autogen/README_51.md | 2 +- ocaml/sdk-gen/powershell/autogen/XenServerPSModule.psd1 | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/content/xen-api/topics/vm-lifecycle.md b/doc/content/xen-api/topics/vm-lifecycle.md index 7390dc61e80..44727bdf3f0 100644 --- a/doc/content/xen-api/topics/vm-lifecycle.md +++ b/doc/content/xen-api/topics/vm-lifecycle.md @@ -2,7 +2,7 @@ title = "VM Lifecycle" +++ -The following figure shows the states that a VM can be in and the +The following figure shows the states that a VM can be in and the API calls that can be used to move the VM between these states. ```mermaid diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index ab0d1669788..6e6772b7add 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1441,8 +1441,8 @@ let update_sync_frequency = let configure_update_sync = call ~name:"configure_update_sync" ~doc: - "Configure periodic update synchronization to sync updates from a remote \ - CDN" + "Configure periodic update synchronization to synchronize updates from a \ + remote CDN" ~lifecycle:[] ~params: [ diff --git a/ocaml/sdk-gen/c/README.dist b/ocaml/sdk-gen/c/README.dist index e5fb8622069..20095880330 100644 --- a/ocaml/sdk-gen/c/README.dist +++ b/ocaml/sdk-gen/c/README.dist @@ -1,7 +1,7 @@ libxenserver ============ -Copyright (c) 2007-2023 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2007-2025 Cloud Software Group, Inc. All Rights Reserved. libxenserver is a complete SDK for XenServer exposing the XenServer API to C developers. diff --git a/ocaml/sdk-gen/csharp/autogen/README.md b/ocaml/sdk-gen/csharp/autogen/README.md index bd75fb5ffd2..acc7622ee50 100644 --- a/ocaml/sdk-gen/csharp/autogen/README.md +++ b/ocaml/sdk-gen/csharp/autogen/README.md @@ -1,6 +1,6 @@ # XenServer.NET -Copyright (c) 2007-2024 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2007-2025 Cloud Software Group, Inc. All Rights Reserved. XenServer.NET is a complete SDK for XenServer, exposing the XenServer API as .NET classes. It is written in C#. diff --git a/ocaml/sdk-gen/csharp/autogen/src/README-NuGet.md b/ocaml/sdk-gen/csharp/autogen/src/README-NuGet.md index 47ccad8037b..0e4a60fffc0 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/README-NuGet.md +++ b/ocaml/sdk-gen/csharp/autogen/src/README-NuGet.md @@ -1,6 +1,6 @@ # XenServer.NET -Copyright (c) 2007-2024 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2007-2025 Cloud Software Group, Inc. All Rights Reserved. XenServer.NET is a complete SDK for XenServer, exposing the XenServer API as .NET classes. It is written in C#. diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj index bf387509141..8f36aba76fa 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj +++ b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj @@ -11,7 +11,7 @@ $(AssemblyName).NET $(AssemblyName).NET .NET wrapper for the XenServer API - Copyright (c) 2007-2024 Cloud Software Group, Inc. All Rights Reserved. + Copyright (c) 2007-2025 Cloud Software Group, Inc. All Rights Reserved. citrix hypervisor virtualization sdk jsonrpc .net c# xen xenserver BSD-2-Clause https://github.com/xapi-project/xen-api diff --git a/ocaml/sdk-gen/go/README.md b/ocaml/sdk-gen/go/README.md index 2cf13fee41b..33e7eef8a38 100644 --- a/ocaml/sdk-gen/go/README.md +++ b/ocaml/sdk-gen/go/README.md @@ -1,6 +1,6 @@ # XenServer SDK for Go -Copyright (c) 2023-2024 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2023-2025 Cloud Software Group, Inc. All Rights Reserved. XenServer SDK for Go is a complete SDK for XenServer, exposing the XenServer API as Go module. It is written in Go. diff --git a/ocaml/sdk-gen/java/autogen/xen-api/src/main/resources/README.txt b/ocaml/sdk-gen/java/autogen/xen-api/src/main/resources/README.txt index 632b8af5728..f5d13f889f5 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/src/main/resources/README.txt +++ b/ocaml/sdk-gen/java/autogen/xen-api/src/main/resources/README.txt @@ -1,7 +1,7 @@ XenServerJava ============= -Copyright (c) 2007-2023 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2007-2025 Cloud Software Group, Inc. All Rights Reserved. XenServerJava is a complete SDK for XenServer, exposing the XenServer API as Java classes. diff --git a/ocaml/sdk-gen/powershell/autogen/README.md b/ocaml/sdk-gen/powershell/autogen/README.md index abbb3b0b1e7..40812c3f7c8 100644 --- a/ocaml/sdk-gen/powershell/autogen/README.md +++ b/ocaml/sdk-gen/powershell/autogen/README.md @@ -1,6 +1,6 @@ # XenServer PowerShell Module -Copyright (c) 2013-2024 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2013-2025 Cloud Software Group, Inc. All Rights Reserved. The XenServer PowerShell Module is a complete SDK for XenServer, exposing the XenServer API as Windows PowerShell cmdlets. diff --git a/ocaml/sdk-gen/powershell/autogen/README_51.md b/ocaml/sdk-gen/powershell/autogen/README_51.md index 4d5b19e26be..24ac038ee3c 100644 --- a/ocaml/sdk-gen/powershell/autogen/README_51.md +++ b/ocaml/sdk-gen/powershell/autogen/README_51.md @@ -1,6 +1,6 @@ # XenServer PowerShell Module -Copyright (c) 2013-2024 Cloud Software Group, Inc. All Rights Reserved. +Copyright (c) 2013-2025 Cloud Software Group, Inc. All Rights Reserved. The XenServer PowerShell Module is a complete SDK for XenServer, exposing the XenServer API as Windows PowerShell cmdlets. diff --git a/ocaml/sdk-gen/powershell/autogen/XenServerPSModule.psd1 b/ocaml/sdk-gen/powershell/autogen/XenServerPSModule.psd1 index 87edaa0214f..968dfddce70 100644 --- a/ocaml/sdk-gen/powershell/autogen/XenServerPSModule.psd1 +++ b/ocaml/sdk-gen/powershell/autogen/XenServerPSModule.psd1 @@ -37,7 +37,7 @@ GUID = 'D695A8B9-039A-443C-99A4-0D48D7C6AD76' #Copyright Author = '' CompanyName = 'Cloud Software Group, Inc' -Copyright = 'Copyright (c) 2013-2024 Cloud Software Group, Inc. All rights reserved.' +Copyright = 'Copyright (c) 2013-2025 Cloud Software Group, Inc. All rights reserved.' # Requirements PowerShellVersion = '@PS_VERSION@' From ece03ca03cd54064954bf2b8ee5e53be5c73cd6c Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Wed, 29 Jan 2025 14:42:39 +0100 Subject: [PATCH 018/117] debug traces for is_component_enabled Signed-off-by: Yann Dirson --- ocaml/xapi/xapi_observer_components.ml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_observer_components.ml b/ocaml/xapi/xapi_observer_components.ml index 0b3b884f465..e7803189151 100644 --- a/ocaml/xapi/xapi_observer_components.ml +++ b/ocaml/xapi/xapi_observer_components.ml @@ -90,9 +90,17 @@ let is_component_enabled ~component = |> List.mem component ) observers - with _ -> false + with e -> + D.log_backtrace () ; + D.warn "is_component_enabled(%s) inner got exception: %s" + (to_string component) (Printexc.to_string e) ; + false ) - with _ -> false + with e -> + D.log_backtrace () ; + D.warn "is_component_enabled(%s) got exception: %s" (to_string component) + (Printexc.to_string e) ; + false let is_smapi_enabled () = is_component_enabled ~component:SMApi From b58bf8284a24e87f3e9f1eb641e41f0e4fd65bfb Mon Sep 17 00:00:00 2001 From: Aidan Allen Date: Fri, 24 Jan 2025 11:07:57 +0000 Subject: [PATCH 019/117] CP-53470 Additional spans in & around the pause section in VM.migrate Signed-off-by: Aidan Allen --- ocaml/xenopsd/lib/xenops_server.ml | 22 ------------------ ocaml/xenopsd/lib/xenops_task.ml | 24 ++++++++++++++++++++ ocaml/xenopsd/xc/device.ml | 8 +++++-- ocaml/xenopsd/xc/domain.ml | 32 ++++++++++++++------------- ocaml/xenopsd/xc/dune | 2 ++ ocaml/xenopsd/xc/xenops_server_xen.ml | 28 +++++++++++++++++++---- 6 files changed, 73 insertions(+), 43 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e3f0a77f5e8..6744bb8488c 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1835,28 +1835,6 @@ let rec atomics_of_operation = function | _ -> [] -let with_tracing ~name ~task f = - let open Tracing in - let parent = Xenops_task.tracing task in - let tracer = Tracer.get_tracer ~name in - match Tracer.start ~tracer ~name ~parent () with - | Ok span -> ( - Xenops_task.set_tracing task span ; - try - let result = f () in - ignore @@ Tracer.finish span ; - Xenops_task.set_tracing task parent ; - result - with exn -> - let backtrace = Printexc.get_raw_backtrace () in - let error = (exn, backtrace) in - ignore @@ Tracer.finish span ~error ; - raise exn - ) - | Error e -> - warn "Failed to start tracing: %s" (Printexc.to_string e) ; - f () - let rec perform_atomic ~progress_callback ?result (op : atomic) (t : Xenops_task.task_handle) : unit = let module B = (val get_backend () : S) in diff --git a/ocaml/xenopsd/lib/xenops_task.ml b/ocaml/xenopsd/lib/xenops_task.ml index 23d88beef18..f2c3993cf39 100644 --- a/ocaml/xenopsd/lib/xenops_task.ml +++ b/ocaml/xenopsd/lib/xenops_task.ml @@ -1,5 +1,7 @@ open Xenops_utils +module D = Debug.Make (struct let name = __MODULE__ end) + module XI = struct include Xenops_interface @@ -89,3 +91,25 @@ let traceparent_header_of_task t = ) (Xenops_task.tracing t) |> Option.to_list + +let with_tracing ~name ~task f = + let open Tracing in + let parent = Xenops_task.tracing task in + let tracer = Tracer.get_tracer ~name in + match Tracer.start ~tracer ~name ~parent () with + | Ok span -> ( + Xenops_task.set_tracing task span ; + try + let result = f () in + let _ : (Span.t option, exn) result = Tracer.finish span in + Xenops_task.set_tracing task parent ; + result + with exn -> + let backtrace = Printexc.get_raw_backtrace () in + let error = (exn, backtrace) in + let _ : (Span.t option, exn) result = Tracer.finish span ~error in + raise exn + ) + | Error e -> + D.warn "Failed to start tracing: %s" (Printexc.to_string e) ; + f () diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 235f6457875..22514697509 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -2501,6 +2501,7 @@ module Backend = struct let assert_can_suspend ~xs:_ _ = () let suspend (task : Xenops_task.task_handle) ~xs ~qemu_domid domid = + with_tracing ~task ~name:"Qemu_none.Dm.suspend" @@ fun () -> Dm_Common.signal task ~xs ~qemu_domid ~domid "save" ~wait_for:"paused" let stop ~xs:_ ~qemu_domid:_ ~vtpm:_ _ = () @@ -3180,7 +3181,8 @@ module Backend = struct (* key not present *) - let suspend (_ : Xenops_task.task_handle) ~xs:_ ~qemu_domid:_ domid = + let suspend (task : Xenops_task.task_handle) ~xs:_ ~qemu_domid:_ domid = + with_tracing ~task ~name:"Qemu_upstream_compat.Dm.suspend" @@ fun () -> let as_msg cmd = Qmp.(Success (Some __LOC__, cmd)) in let perms = [Unix.O_WRONLY; Unix.O_CREAT] in let save_file = sprintf qemu_save_path domid in @@ -3860,7 +3862,8 @@ module Dm = struct debug "Called Dm.restore_vgpu" ; start_vgpu ~xc ~xs task ~restore:true domid vgpus vcpus profile - let suspend_varstored (_ : Xenops_task.task_handle) ~xs domid ~vm_uuid = + let suspend_varstored (task : Xenops_task.task_handle) ~xs domid ~vm_uuid = + with_tracing ~task ~name:"Dm.suspend_varstored" @@ fun () -> debug "Called Dm.suspend_varstored (domid=%d)" domid ; Service.Varstored.stop ~xs domid ; Xenops_sandbox.Varstore_guard.read ~domid efivars_save_path ~vm_uuid @@ -3877,6 +3880,7 @@ module Dm = struct debug "Wrote EFI variables to %s (domid=%d)" path domid let suspend_vtpm (task : Xenops_task.task_handle) ~xs domid ~vtpm = + with_tracing ~task ~name:"Dm.suspend_vtpm" @@ fun () -> debug "Called Dm.suspend_vtpm (domid=%d)" domid ; let dbg = Xenops_task.get_dbg task in Option.map diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index fce32abf19b..b7756f7f704 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1702,22 +1702,23 @@ let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type (string_of_message message) ; match message with | Suspend -> - do_suspend_callback () ; - if domain_type = `hvm then ( - let vm_uuid = Uuidx.to_string uuid in - debug "VM = %s; domid = %d; suspending qemu-dm" vm_uuid domid ; - Device.Dm.suspend task ~xs ~qemu_domid ~dm domid ; - if is_uefi then - let (_ : string) = - Device.Dm.suspend_varstored task ~xs domid ~vm_uuid - in - let (_ : string list) = - Device.Dm.suspend_vtpm task ~xs domid ~vtpm - in - () + ( with_tracing ~task ~name:"suspend_emu_manager Suspend" @@ fun () -> + do_suspend_callback () ; + if domain_type = `hvm then ( + let vm_uuid = Uuidx.to_string uuid in + debug "VM = %s; domid = %d; suspending qemu-dm" vm_uuid domid ; + Device.Dm.suspend task ~xs ~qemu_domid ~dm domid ; + if is_uefi then + let (_ : string) = + Device.Dm.suspend_varstored task ~xs domid ~vm_uuid + in + let (_ : string list) = + Device.Dm.suspend_vtpm task ~xs domid ~vtpm + in + () + ) ) ; - send_done cnx ; - wait_for_message () + send_done cnx ; wait_for_message () | Prepare x when x = "xenguest" -> debug "Writing Libxc header" ; write_header main_fd (Libxc, 0L) >>= fun () -> @@ -1740,6 +1741,7 @@ let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type ) ) | Result _ -> + with_tracing ~task ~name:"suspend_emu_manager Result" @@ fun () -> debug "VM = %s; domid = %d; emu-manager completed successfully" (Uuidx.to_string uuid) domid ; return () diff --git a/ocaml/xenopsd/xc/dune b/ocaml/xenopsd/xc/dune index 1ee8a87e6e5..7cbdff2aa2c 100644 --- a/ocaml/xenopsd/xc/dune +++ b/ocaml/xenopsd/xc/dune @@ -51,6 +51,8 @@ xapi-stdext-std xapi-stdext-threads xapi-stdext-unix + xapi-tracing + xapi-tracing-export xapi_xenopsd xapi_xenopsd_c_stubs xapi_xenopsd_xc_c_stubs diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 071eae2e062..ba3dd7e2b8a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2499,15 +2499,35 @@ module VM = struct None in let manager_path = choose_emu_manager vm.Vm.platformdata in + with_tracing ~task ~name:"VM_save_domain_suspend" @@ fun () -> Domain.suspend task ~xc ~xs ~domain_type ~dm:(dm_of ~vm) ~vtpm:(vtpm_of ~vm) ~progress_callback ~qemu_domid ~manager_path ~is_uefi vm_str domid fd vgpu_fd flags' (fun () -> + with_tracing ~task ~name:"VM_save_domain_suspend_callback" + @@ fun () -> (* SCTX-2558: wait more for ballooning if needed *) - wait_ballooning task vm ; - pre_suspend_callback task ; - if not (request_shutdown task vm Suspend 30.) then + ( with_tracing ~task ~name:"VM_save_wait_ballooning" @@ fun () -> + wait_ballooning task vm + ) ; + ( with_tracing ~task ~name:"VM_save_pre_suspend_callback" + @@ fun () -> pre_suspend_callback task + ) ; + + if + not + ( with_tracing ~task + ~name:"VM_save_domain_suspend_callback_request_shutdown" + @@ fun () -> request_shutdown task vm Suspend 30. + ) + then raise (Xenopsd_error Failed_to_acknowledge_suspend_request) ; - if not (wait_shutdown task vm Suspend 1200.) then + if + not + ( with_tracing ~task + ~name:"VM_save_domain_suspend_callback_wait_shutdown" + @@ fun () -> wait_shutdown task vm Suspend 1200. + ) + then raise (Xenopsd_error (Failed_to_suspend (vm.Vm.id, 1200.))) ) ; (* Record the final memory usage of the domain so we know how much From 6587b75ce71caf1885f05bd063f613870feecd37 Mon Sep 17 00:00:00 2001 From: Aidan Allen Date: Thu, 30 Jan 2025 14:42:21 +0000 Subject: [PATCH 020/117] CP-53470 Rejoin orphaned spans forwarded to xenopsd to their traceparent Signed-off-by: Aidan Allen --- ocaml/xenopsd/lib/xenops_migrate.ml | 2 +- ocaml/xenopsd/lib/xenopsd.ml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_migrate.ml b/ocaml/xenopsd/lib/xenops_migrate.ml index db6b2f9e0e6..1121af83024 100644 --- a/ocaml/xenopsd/lib/xenops_migrate.ml +++ b/ocaml/xenopsd/lib/xenops_migrate.ml @@ -95,7 +95,7 @@ module Forwarded_http_request = struct type t = { uri: string ; query: (string * string) list - ; traceparent: string option + ; additional_headers: (string * string) list ; cookie: (string * string) list ; body: string option } diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index cb79fd20991..e0a4f5949db 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -356,7 +356,8 @@ let handle_received_fd this_connection = let context = {Xenops_server.transferred_fd= Some received_fd} in let uri = Uri.of_string req.Xenops_migrate.Forwarded_http_request.uri in let traceparent = - req.Xenops_migrate.Forwarded_http_request.traceparent + List.assoc_opt "traceparent" + req.Xenops_migrate.Forwarded_http_request.additional_headers in fn uri req.Xenops_migrate.Forwarded_http_request.cookie traceparent this_connection context From 391c8f44ee26ba0bc9d4a9c64686c5587ea66756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 7 Feb 2025 16:10:03 +0000 Subject: [PATCH 021/117] CA-405971: check tgroups_enabled feature flag before classifying threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was a codepath here that ran even with the feature flag disabled. Signed-off-by: Edwin Török --- ocaml/xapi/server_helpers.ml | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index b424036c0bf..6185c284b43 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -141,19 +141,22 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name Context.of_http_req ?session_id ~internal_async_subtask ~generate_task_for ~supports_async ~label ~http_req ~fd () in - let identity = - try - Option.map - (fun session_id -> - let subject = - Db.Session.get_auth_user_sid ~__context ~self:session_id - in - Tgroup.Group.Identity.make ?user_agent:http_req.user_agent subject - ) - session_id - with _ -> None - in - Tgroup.of_creator (Tgroup.Group.Creator.make ?identity ()) ; + ( if !Xapi_globs.tgroups_enabled then + let identity = + try + Option.map + (fun session_id -> + let subject = + Db.Session.get_auth_user_sid ~__context ~self:session_id + in + Tgroup.Group.Identity.make ?user_agent:http_req.user_agent + subject + ) + session_id + with _ -> None + in + Tgroup.of_creator (Tgroup.Group.Creator.make ?identity ()) + ) ; let sync () = let need_complete = not (Context.forwarded_task __context) in exec_with_context ~__context ~need_complete ~called_async From 405bcb52b34fa29f9f841d5d4e4ad1925bb3066a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 7 Feb 2025 15:17:39 +0000 Subject: [PATCH 022/117] CA-405971: avoid calling DB functions when in emergency mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This breaks emergency mode commands. Signed-off-by: Edwin Török --- ocaml/xapi/server_helpers.ml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 6185c284b43..77e1f02bfb9 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -152,7 +152,14 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name Tgroup.Group.Identity.make ?user_agent:http_req.user_agent subject ) - session_id + ( if !Xapi_globs.slave_emergency_mode then + (* in emergency mode we cannot reach the coordinator, + and we must not attempt to make Db calls + *) + None + else + session_id + ) with _ -> None in Tgroup.of_creator (Tgroup.Group.Creator.make ?identity ()) From 97682ef3e2e6ffc598a13d52258c2fc184846605 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 7 Feb 2025 12:00:00 +0100 Subject: [PATCH 023/117] Hugo docs: Support dark themes: Invert images to match the theme Signed-off-by: Bernhard Kaindl --- doc/layouts/partials/custom-footer.html | 94 +++++++++++++++++++++++++ doc/layouts/partials/menu-footer.html | 0 2 files changed, 94 insertions(+) create mode 100644 doc/layouts/partials/custom-footer.html delete mode 100644 doc/layouts/partials/menu-footer.html diff --git a/doc/layouts/partials/custom-footer.html b/doc/layouts/partials/custom-footer.html new file mode 100644 index 00000000000..62f35eeecd1 --- /dev/null +++ b/doc/layouts/partials/custom-footer.html @@ -0,0 +1,94 @@ + diff --git a/doc/layouts/partials/menu-footer.html b/doc/layouts/partials/menu-footer.html deleted file mode 100644 index e69de29bb2d..00000000000 From 88293b1d99d0c6c175e7b76bbebd2f282416cde3 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 7 Feb 2025 12:00:00 +0100 Subject: [PATCH 024/117] Hugo docs: Support dark themes: Manual striping is obsolete Signed-off-by: Bernhard Kaindl --- doc/assets/css/misc.css | 4 ---- doc/layouts/partials/content.html | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/assets/css/misc.css b/doc/assets/css/misc.css index beb5a28e43a..dad61421838 100644 --- a/doc/assets/css/misc.css +++ b/doc/assets/css/misc.css @@ -47,10 +47,6 @@ } -.table-striped > tbody > tr:nth-child(odd) { - background-color: #f9f9f9; -} - .btn { display: inline-block; padding: 6px 12px; diff --git a/doc/layouts/partials/content.html b/doc/layouts/partials/content.html index ebba286db1e..3700bf47032 100644 --- a/doc/layouts/partials/content.html +++ b/doc/layouts/partials/content.html @@ -216,7 +216,8 @@

Changes

- + +
{{ range . }} From 55238a3adfb6af7f500180055bb83d2204c8c74b Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 7 Feb 2025 12:00:00 +0100 Subject: [PATCH 025/117] Hugo docs: Support dark themes: Fix class reference tables Signed-off-by: Bernhard Kaindl --- doc/assets/css/xenapi.css | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/assets/css/xenapi.css b/doc/assets/css/xenapi.css index d75b1b6d089..10caf35ee63 100644 --- a/doc/assets/css/xenapi.css +++ b/doc/assets/css/xenapi.css @@ -42,6 +42,16 @@ th { text-align: left; .field, .field2 { margin: 0em 0; padding: .5em .7em .7em; + /** + * doc/layouts/partials/content.html generates tables with alternating + * field and field2 for the rows of the XenAPI Class Reference tables. + * Their background colours are hard-coded to bright colours here, but the + * colors are not adjusted for dark mode. We cannot use the theme colours + * in this case. Thus we have to hard-code the colours for now. Ergo, also + * hard-code the text colour to ensure that it has contrast in dark mode too. + * Only shades of grey are used, so the text colour is hard-coded to black. + */ + color: black; background-color: #dddddd; cursor: pointer; font-size: 15px; From 29380f6bc9ad9cd8aaeb9f5f57c472dd3442332c Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 7 Feb 2025 12:00:00 +0100 Subject: [PATCH 026/117] Hugo docs: Allow for quick doc updates using the edit button Signed-off-by: Bernhard Kaindl --- doc/hugo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/hugo.toml b/doc/hugo.toml index 7b2dff698b4..823de1113af 100644 --- a/doc/hugo.toml +++ b/doc/hugo.toml @@ -29,6 +29,7 @@ home = [ "HTML", "RSS", "PRINT"] section = [ "HTML", "RSS", "PRINT"] [params] +editURL = 'https://github.com/xapi-project/xen-api/edit/master/doc/content/${FilePath}' # Enable the theme variant selector, default to auto: themeVariant = [ "auto", From 558eaca4ae007b18f154b0e25b75555919f15546 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 7 Feb 2025 12:00:00 +0100 Subject: [PATCH 027/117] Hugo docs: Give images a very smooth shadow, not a flashy border Signed-off-by: Bernhard Kaindl --- doc/hugo.toml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/doc/hugo.toml b/doc/hugo.toml index 7b2dff698b4..33a62bcaf6b 100644 --- a/doc/hugo.toml +++ b/doc/hugo.toml @@ -47,3 +47,26 @@ themeVariant = [ themeVariantAuto = ["red", "zen-dark"] alwaysopen = false collapsibleMenu = true + + [params.imageEffects] + + # + # Enable a soft shadow around the images that make the images appear to + # stand out ever so slightly like paper on a desk, giving them a smooth look: + # + shadow = true + + # + # The CSS-based photographer's lightbox makes the image border flash + # on mouse-over and darkens the rest of the page when clicking on images. + # + # It is better to disable it as it serves no proper function for the + # toolstack docs and causes a border around the image to appear/disappear + # in a flash when entering/leaving the image. Disabling it turns the sudden + # appearance and disappearance of the flashy border off. + # + # Initially, this was based on the Featherlight jQuery plugin, which would + # have enlarged the images, but the CSS-only solution appears inadequate + # for a proper lightbox as it does not zoom the image: + # + lightbox = false From ac3430f2ea861b79e1969b54e84602325df9e16e Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Sat, 8 Feb 2025 12:00:00 +0100 Subject: [PATCH 028/117] README: Submission: Add DCO, issues & remove the disabled xen-api list Signed-off-by: Bernhard Kaindl --- README.markdown | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/README.markdown b/README.markdown index 37174144a3e..f83f3de470b 100644 --- a/README.markdown +++ b/README.markdown @@ -99,18 +99,26 @@ git push origin --tags Contributions ------------- -To contribute patches to xen-api, please fork the repository on -Github, and then submit a pull request. If for some reason you can't -use Github to submit a pull request, then you may send your patch for -review to the [xen-api@lists.xenproject.org mailing list](http://www.xenproject.org/help/mailing-list.html), with a link to a -public git repository for review. We much prefer Github pull requests, -however, and submitting a patch to the mailing list will take much -more time for review. - -Maintainers +To contribute changes to xen-api, please fork the repository on +GitHub, and then submit a pull request. + +It is required to add a `Signed-off-by:` as a +[Developers Certificate of Origin](http://developercertificate.org). +It certifies the patch's origin and is licensed under an +appropriate open-source licence to include it in Xapi: +https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff + +Discussions ----------- -Maintainers can be contacted via this mailing list: `xen-api@lists.xenproject.org` +Discussions can be started at +https://github.com/xapi-project/xen-api/discussions + +Issues +------ + +Issues can be raised at +https://github.com/xapi-project/xen-api/issues Licensing --------- From 60f4aa9000334efd27c402935bff233731d7deef Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Mon, 10 Feb 2025 12:00:00 +0100 Subject: [PATCH 029/117] docs/xenopsd: List the child pages using the children shortcode Signed-off-by: Bernhard Kaindl --- doc/content/xenopsd/design/_index.md | 5 ++++- doc/content/xenopsd/walkthroughs/_index.md | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/content/xenopsd/design/_index.md b/doc/content/xenopsd/design/_index.md index a55a9b124b7..2047d068ad5 100644 --- a/doc/content/xenopsd/design/_index.md +++ b/doc/content/xenopsd/design/_index.md @@ -1,3 +1,6 @@ +++ title = "Design" -+++ \ No newline at end of file ++++ + +Design documents for `xenopsd`: +{{% children %}} diff --git a/doc/content/xenopsd/walkthroughs/_index.md b/doc/content/xenopsd/walkthroughs/_index.md index d54568dcbbf..2217b209ab6 100644 --- a/doc/content/xenopsd/walkthroughs/_index.md +++ b/doc/content/xenopsd/walkthroughs/_index.md @@ -6,8 +6,10 @@ linkTitle = "Walk-throughs" Let's trace through interesting operations to see how the whole system works. -- [Starting a VM](VM.start.md) -- [Migrating a VM](VM.migrate.md) +{{% children %}} + +Inspiration for other walk-throughs: + - Shutting down a VM and waiting for it to happen - A VM wants to reboot itself - A disk is hotplugged From c2fdee1a8bfe7970de57c5b3529735a29fc35505 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 10 Feb 2025 16:00:06 +0000 Subject: [PATCH 030/117] CA-405820 guard /etc/init.d/functions in xe-toolstack-restart The sourced file does not exist in XS9. I believe we are not using any of the functionality provided but to be safe, keep it in XS8 for now. Signed-off-by: Christian Lindig --- scripts/xe-toolstack-restart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index 55e82e8f3d8..57f3675e9c3 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -11,7 +11,7 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # -. /etc/init.d/functions +test -f /etc/init.d/functions && source /etc/init.d/functions FILENAME=`basename $0` LOCKFILE='/dev/shm/xe_toolstack_restart.lock' From 383e63fcb4b256a7a229144f91195e4fbf39f3e4 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 11 Feb 2025 11:13:13 +0000 Subject: [PATCH 031/117] Simplify cases of may_be_side_effecting Use nested disjunctive patterns. Signed-off-by: Colin James --- ocaml/idl/ocaml_backend/gen_server.ml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ocaml/idl/ocaml_backend/gen_server.ml b/ocaml/idl/ocaml_backend/gen_server.ml index 31e2bbe16f2..52311a3009f 100644 --- a/ocaml/idl/ocaml_backend/gen_server.ml +++ b/ocaml/idl/ocaml_backend/gen_server.ml @@ -286,16 +286,12 @@ let operation (obj : obj) (x : message) = in let may_be_side_effecting msg = match msg.msg_tag with - | FromField (Setter, _) | FromField (Add, _) | FromField (Remove, _) -> + | Custom + | FromField ((Setter | Add | Remove), _) + | FromObject (Make | Delete | Private Copy) -> true - | FromField _ -> + | FromObject _ | FromField (Getter, _) -> false - | FromObject Make | FromObject Delete | FromObject (Private Copy) -> - true - | FromObject _ -> - false - | Custom -> - true in let session_check_exp = if x.msg_session then From e3ffebbfdeaaaaa44fa8e4646f927d360e22619f Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 11 Feb 2025 11:13:42 +0000 Subject: [PATCH 032/117] Drop count_mandatory_message_parameters The function count_mandatory_message_parameters is dropped in favour of computing the value in-place from information already available. The original function is just an inlined left fold. Signed-off-by: Colin James --- ocaml/idl/ocaml_backend/gen_server.ml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/ocaml/idl/ocaml_backend/gen_server.ml b/ocaml/idl/ocaml_backend/gen_server.ml index 52311a3009f..c7782b5f626 100644 --- a/ocaml/idl/ocaml_backend/gen_server.ml +++ b/ocaml/idl/ocaml_backend/gen_server.ml @@ -57,18 +57,6 @@ let has_default_args args = Code to generate a single operation in server dispatcher ------------------------------------------------------------------------------------------ *) -let count_mandatory_message_parameters (msg : message) = - (* Returns the number of mandatory parameters of a message *) - let rec count_mandatory_parameters (params : param list) = - match params with - | [] -> - 0 - | head :: tail -> - (match head.param_default with None -> 1 | Some _ -> 0) - + count_mandatory_parameters tail - in - count_mandatory_parameters msg.msg_params - let operation (obj : obj) (x : message) = let msg_params = x.DT.msg_params in let msg_params_with_default_values, msg_params_without_default_values = @@ -428,7 +416,7 @@ let operation (obj : obj) (x : message) = ^ " | _ ->\n" ^ " Server_helpers.parameter_count_mismatch_failure __call " ^ "\"" - ^ string_of_int (count_mandatory_message_parameters x) + ^ string_of_int (List.length msg_params_without_default_values) ^ "\"" ^ " (string_of_int ((List.length __params) - " ^ (if x.msg_session then "1" else "0") From ce4b1c8cb3f802cb0c4ddf748249171188a2ee0b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 11 Feb 2025 16:52:50 +0000 Subject: [PATCH 033/117] message-switch/unix: reuse stdext's Mutex.lock More importantly, Mutex.unlock exception's weren't handled Signed-off-by: Pau Ruiz Safont --- ocaml/message-switch/unix/protocol_unix.ml | 47 ++++++------------- .../unix/protocol_unix_scheduler.ml | 24 +++------- 2 files changed, 21 insertions(+), 50 deletions(-) diff --git a/ocaml/message-switch/unix/protocol_unix.ml b/ocaml/message-switch/unix/protocol_unix.ml index 7e4432a28f2..5c20a5b86f2 100644 --- a/ocaml/message-switch/unix/protocol_unix.ml +++ b/ocaml/message-switch/unix/protocol_unix.ml @@ -16,12 +16,7 @@ open Message_switch_core.Protocol -let with_lock m f = - Mutex.lock m ; - try - let r = f () in - Mutex.unlock m ; r - with e -> Mutex.unlock m ; raise e +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute let thread_forever f v = let rec loop () = @@ -133,14 +128,6 @@ module IO = struct ) end - module Mutex = struct - type t = Mutex.t - - let create = Mutex.create - - let with_lock = with_lock - end - module Clock = struct type _timer = Protocol_unix_scheduler.t @@ -239,7 +226,7 @@ module Client = struct type t = { mutable requests_conn: IO.ic * IO.oc ; mutable events_conn: IO.ic * IO.oc - ; requests_m: IO.Mutex.t + ; requests_m: Mutex.t ; wakener: ( Message_switch_core.Protocol.message_id , Message_switch_core.Protocol.Message.t result IO.Ivar.t @@ -264,7 +251,7 @@ module Client = struct reconnect () >>|= fun (requests_conn, events_conn) -> let wakener = Hashtbl.create 10 in do_rpc requests_conn (In.CreateTransient token) >>|= fun reply_queue_name -> - let requests_m = IO.Mutex.create () in + let requests_m = Mutex.create () in let t = {requests_conn; events_conn; requests_m; wakener; reply_queue_name} in @@ -292,7 +279,7 @@ module Client = struct Error e | Ok (), i, m -> (* If the Ack doesn't belong to us then assume it's another thread *) - IO.Mutex.with_lock requests_m (fun () -> + with_lock requests_m (fun () -> match m.Message.kind with | Message.Response j -> ( match Hashtbl.find_opt wakener j with @@ -334,7 +321,7 @@ module Client = struct let c = ref None in let m = Mutex.create () in fun ~switch () -> - IO.Mutex.with_lock m (fun () -> + with_lock m (fun () -> match !c with | Some x -> Ok x @@ -388,44 +375,44 @@ module Client = struct | Ok response -> (* release resources *) Option.iter IO.Clock.cancel timer ; - IO.Mutex.with_lock c.requests_m (fun () -> Hashtbl.remove c.wakener id) ; + with_lock c.requests_m (fun () -> Hashtbl.remove c.wakener id) ; Ok response.Message.payload | Error e -> Error e let list ~t:c ~prefix ?(filter = `All) () = - IO.Mutex.with_lock c.requests_m (fun () -> + with_lock c.requests_m (fun () -> do_rpc c.requests_conn (In.List (prefix, filter)) >>|= fun result -> Ok (Out.string_list_of_rpc (Jsonrpc.of_string result)) ) let ack ~t:c ~message:(name, id) () = - IO.Mutex.with_lock c.requests_m (fun () -> + with_lock c.requests_m (fun () -> do_rpc c.requests_conn (In.Ack (name, id)) >>|= fun (_ : string) -> Ok () ) let diagnostics ~t:c () = - IO.Mutex.with_lock c.requests_m (fun () -> + with_lock c.requests_m (fun () -> do_rpc c.requests_conn In.Diagnostics >>|= fun (result : string) -> Ok (Diagnostics.t_of_rpc (Jsonrpc.of_string result)) ) let trace ~t:c ?(from = 0L) ?(timeout = 0.) () = - IO.Mutex.with_lock c.requests_m (fun () -> + with_lock c.requests_m (fun () -> do_rpc c.requests_conn (In.Trace (from, timeout)) >>|= fun (result : string) -> Ok (Out.trace_of_rpc (Jsonrpc.of_string result)) ) let shutdown ~t:c () = - IO.Mutex.with_lock c.requests_m (fun () -> + with_lock c.requests_m (fun () -> do_rpc c.requests_conn In.Shutdown >>|= fun (_ : string) -> IO.IO.return (Ok ()) ) let destroy ~t ~queue:queue_name () = - IO.Mutex.with_lock t.requests_m (fun () -> + with_lock t.requests_m (fun () -> do_rpc t.requests_conn (In.Destroy queue_name) >>|= fun (_ : string) -> IO.IO.return (Ok ()) ) @@ -487,7 +474,7 @@ module Server = struct let frame = In.Transfer transfer in do_rpc request_conn frame >>= function | Error _e -> - IO.Mutex.with_lock mutex (fun () -> + with_lock mutex (fun () -> IO.disconnect request_conn ; IO.disconnect reply_conn ; reconnect () @@ -523,16 +510,12 @@ module Server = struct } ) in - IO.Mutex.with_lock mutex (fun () -> - do_rpc reply_conn request - ) + with_lock mutex (fun () -> do_rpc reply_conn request) >>= fun _ -> return () ) >>= fun () -> let request = In.Ack i in - IO.Mutex.with_lock mutex (fun () -> - do_rpc reply_conn request - ) + with_lock mutex (fun () -> do_rpc reply_conn request) >>= fun _ -> () ) () diff --git a/ocaml/message-switch/unix/protocol_unix_scheduler.ml b/ocaml/message-switch/unix/protocol_unix_scheduler.ml index 3eaeb83218c..2d87711753f 100644 --- a/ocaml/message-switch/unix/protocol_unix_scheduler.ml +++ b/ocaml/message-switch/unix/protocol_unix_scheduler.ml @@ -14,19 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -let finally' f g = - try - let result = f () in - g () ; result - with e -> g () ; raise e - -module Mutex = struct - include Mutex - - let execute m f = - lock m ; - finally' f (fun () -> unlock m) -end +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute module Int64Map = Map.Make (struct type t = int64 @@ -59,7 +47,7 @@ module Dump = struct let make () = let now = now () in - Mutex.execute m (fun () -> + with_lock m (fun () -> Int64Map.fold (fun time xs acc -> List.map (fun i -> {time= Int64.sub time now; thing= i.name}) xs @@ -78,7 +66,7 @@ let one_shot time (name : string) f = Int64.(add (of_int x) (now ())) in let id = - Mutex.execute m (fun () -> + with_lock m (fun () -> let existing = if Int64Map.mem time !schedule then Int64Map.find time !schedule @@ -96,7 +84,7 @@ let one_shot time (name : string) f = (time, id) let cancel (time, id) = - Mutex.execute m (fun () -> + with_lock m (fun () -> let existing = if Int64Map.mem time !schedule then Int64Map.find time !schedule @@ -110,7 +98,7 @@ let cancel (time, id) = let process_expired () = let t = now () in let expired = - Mutex.execute m (fun () -> + with_lock m (fun () -> let expired, unexpired = Int64Map.partition (fun t' _ -> t' <= t) !schedule in @@ -129,7 +117,7 @@ let rec main_loop () = () done ; let sleep_until = - Mutex.execute m (fun () -> + with_lock m (fun () -> try Int64Map.min_binding !schedule |> fst with Not_found -> Int64.add 3600L (now ()) ) From 542d99afe0dbef71f29adb89d886509f4c896e9a Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 11 Feb 2025 17:12:11 +0000 Subject: [PATCH 034/117] message-switch/unix: Remove module from Cohttp's IO one The Ivar and CLock modules are unrelated to Cohttp's IO, so move them out. While doing this, add an interface for both. Signed-off-by: Pau Ruiz Safont --- ocaml/message-switch/unix/protocol_unix.ml | 110 ++++++++++++--------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/ocaml/message-switch/unix/protocol_unix.ml b/ocaml/message-switch/unix/protocol_unix.ml index 5c20a5b86f2..710d2d70d35 100644 --- a/ocaml/message-switch/unix/protocol_unix.ml +++ b/ocaml/message-switch/unix/protocol_unix.ml @@ -28,6 +28,61 @@ let thread_forever f v = in Thread.create loop () +(** "Immutable variables" are a single-element container that can only be set + once. Reading the content blocks until they hold a value. *) +module Ivar : sig + type 'a t + + val create : unit -> 'a t + + val fill : 'a t -> 'a -> unit + + val read : 'a t -> 'a +end = struct + type 'a t = {mutable v: 'a option; m: Mutex.t; c: Condition.t} + + let create () = {v= None; m= Mutex.create (); c= Condition.create ()} + + let fill r x = + with_lock r.m (fun () -> + r.v <- Some x ; + Condition.signal r.c + ) + + let read r = + with_lock r.m (fun () -> + while r.v = None do + Condition.wait r.c r.m + done ; + match r.v with Some x -> x | None -> assert false + ) +end + +module Clock : sig + type t + + val run_after : int -> (unit -> unit) -> t + + val cancel : t -> unit +end = struct + type t = Protocol_unix_scheduler.t + + let started = ref false + + let started_m = Mutex.create () + + let run_after timeout f = + with_lock started_m (fun () -> + if not !started then ( + Protocol_unix_scheduler.start () ; + started := true + ) + ) ; + Protocol_unix_scheduler.(one_shot (Delta timeout) "rpc" f) + + let cancel = Protocol_unix_scheduler.cancel +end + module IO = struct let whoami () = Printf.sprintf "%s:%d" (Filename.basename Sys.argv.(0)) (Unix.getpid ()) @@ -107,45 +162,6 @@ module IO = struct end include IO - - module Ivar = struct - type 'a t = {mutable v: 'a option; m: Mutex.t; c: Condition.t} - - let create () = {v= None; m= Mutex.create (); c= Condition.create ()} - - let fill r x = - with_lock r.m (fun () -> - r.v <- Some x ; - Condition.signal r.c - ) - - let read r = - with_lock r.m (fun () -> - while r.v = None do - Condition.wait r.c r.m - done ; - match r.v with Some x -> x | None -> assert false - ) - end - - module Clock = struct - type _timer = Protocol_unix_scheduler.t - - let started = ref false - - let started_m = Mutex.create () - - let run_after timeout f = - with_lock started_m (fun () -> - if not !started then ( - Protocol_unix_scheduler.start () ; - started := true - ) - ) ; - Protocol_unix_scheduler.(one_shot (Delta timeout) "rpc" f) - - let cancel = Protocol_unix_scheduler.cancel - end end module Connection = Message_switch_core.Make.Connection (IO) @@ -229,7 +245,7 @@ module Client = struct ; requests_m: Mutex.t ; wakener: ( Message_switch_core.Protocol.message_id - , Message_switch_core.Protocol.Message.t result IO.Ivar.t + , Message_switch_core.Protocol.Message.t result Ivar.t ) Hashtbl.t ; reply_queue_name: string @@ -286,7 +302,7 @@ module Client = struct | Some x -> do_rpc t.events_conn (In.Ack i) >>|= fun (_ : string) -> - IO.Ivar.fill x (Ok m) ; Ok () + Ivar.fill x (Ok m) ; Ok () | None -> Printf.printf "no wakener for id %s,%Ld\n%!" (fst i) (snd i) ; @@ -332,12 +348,12 @@ module Client = struct ) let rpc ~t:c ~queue:dest_queue_name ?timeout ~body:x () = - let t = IO.Ivar.create () in + let t = Ivar.create () in let timer = Option.map (fun timeout -> - IO.Clock.run_after timeout (fun () -> - IO.Ivar.fill t (Error (`Message_switch `Timeout)) + Clock.run_after timeout (fun () -> + Ivar.fill t (Error (`Message_switch `Timeout)) ) ) timeout @@ -371,10 +387,10 @@ module Client = struct in loop () >>|= fun id -> (* now block waiting for our response *) - match IO.Ivar.read t with + match Ivar.read t with | Ok response -> (* release resources *) - Option.iter IO.Clock.cancel timer ; + Option.iter Clock.cancel timer ; with_lock c.requests_m (fun () -> Hashtbl.remove c.wakener id) ; Ok response.Message.payload | Error e -> @@ -465,7 +481,7 @@ module Server = struct in reconnect () >>|= fun ((request_conn, _reply_conn) as connections) -> (* Only allow one reply RPC at a time (no pipelining) *) - let mutex = IO.Mutex.create () in + let mutex = Mutex.create () in do_rpc request_conn (In.CreatePersistent name) >>|= fun (_ : string) -> let rec loop ((request_conn, reply_conn) as connections) from = let transfer = From 19704ac31003953d72f4e95f8274a5a61019b7f8 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 11 Feb 2025 17:20:55 +0000 Subject: [PATCH 035/117] message-switch/unix: simplify the scheduler Only the relative time was ever used, a module was unused, and types could be simpler. Added an interface to make this clear. Signed-off-by: Pau Ruiz Safont --- ocaml/message-switch/unix/dune | 1 - ocaml/message-switch/unix/protocol_unix.ml | 4 +- .../unix/protocol_unix_scheduler.ml | 42 +++---------------- .../unix/protocol_unix_scheduler.mli | 5 +++ quality-gate.sh | 2 +- 5 files changed, 13 insertions(+), 41 deletions(-) create mode 100644 ocaml/message-switch/unix/protocol_unix_scheduler.mli diff --git a/ocaml/message-switch/unix/dune b/ocaml/message-switch/unix/dune index 4e792493866..92bddfd66fb 100644 --- a/ocaml/message-switch/unix/dune +++ b/ocaml/message-switch/unix/dune @@ -14,6 +14,5 @@ xapi-stdext-threads xapi-stdext-unix ) - (preprocess (per_module ((pps ppx_deriving_rpc) Protocol_unix_scheduler))) ) diff --git a/ocaml/message-switch/unix/protocol_unix.ml b/ocaml/message-switch/unix/protocol_unix.ml index 710d2d70d35..f7aa0802c0f 100644 --- a/ocaml/message-switch/unix/protocol_unix.ml +++ b/ocaml/message-switch/unix/protocol_unix.ml @@ -65,7 +65,7 @@ module Clock : sig val cancel : t -> unit end = struct - type t = Protocol_unix_scheduler.t + type t = int64 * int let started = ref false @@ -78,7 +78,7 @@ end = struct started := true ) ) ; - Protocol_unix_scheduler.(one_shot (Delta timeout) "rpc" f) + Protocol_unix_scheduler.run_after ~seconds:timeout f let cancel = Protocol_unix_scheduler.cancel end diff --git a/ocaml/message-switch/unix/protocol_unix_scheduler.ml b/ocaml/message-switch/unix/protocol_unix_scheduler.ml index 2d87711753f..c05b9468eed 100644 --- a/ocaml/message-switch/unix/protocol_unix_scheduler.ml +++ b/ocaml/message-switch/unix/protocol_unix_scheduler.ml @@ -16,15 +16,10 @@ let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -module Int64Map = Map.Make (struct - type t = int64 - - let compare = compare -end) - +module Int64Map = Map.Make (Int64) module Delay = Xapi_stdext_threads.Threadext.Delay -type item = {id: int; name: string; fn: unit -> unit} +type item = {id: int; fn: unit -> unit} let schedule = ref Int64Map.empty @@ -34,37 +29,10 @@ let next_id = ref 0 let m = Mutex.create () -type time = Absolute of int64 | Delta of int [@@deriving rpc] - -type t = int64 * int [@@deriving rpc] - let now () = Unix.gettimeofday () |> ceil |> Int64.of_float -module Dump = struct - type u = {time: int64; thing: string} [@@deriving rpc] - - type t = u list [@@deriving rpc] - - let make () = - let now = now () in - with_lock m (fun () -> - Int64Map.fold - (fun time xs acc -> - List.map (fun i -> {time= Int64.sub time now; thing= i.name}) xs - @ acc - ) - !schedule [] - ) -end - -let one_shot time (name : string) f = - let time = - match time with - | Absolute x -> - x - | Delta x -> - Int64.(add (of_int x) (now ())) - in +let run_after ~seconds f = + let time = Int64.(add (of_int seconds) (now ())) in let id = with_lock m (fun () -> let existing = @@ -75,7 +43,7 @@ let one_shot time (name : string) f = in let id = !next_id in incr next_id ; - let item = {id; name; fn= f} in + let item = {id; fn= f} in schedule := Int64Map.add time (item :: existing) !schedule ; Delay.signal delay ; id diff --git a/ocaml/message-switch/unix/protocol_unix_scheduler.mli b/ocaml/message-switch/unix/protocol_unix_scheduler.mli new file mode 100644 index 00000000000..0759c77ddea --- /dev/null +++ b/ocaml/message-switch/unix/protocol_unix_scheduler.mli @@ -0,0 +1,5 @@ +val run_after : seconds:int -> (unit -> unit) -> int64 * int + +val cancel : int64 * int -> unit + +val start : unit -> unit diff --git a/quality-gate.sh b/quality-gate.sh index e59b8e40ccb..0b03f9b29a8 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=497 + N=496 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From 66e9509ceecffdf991efe4aa092dd0b30f69ad22 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 036/117] doc/hugo.toml: Use the theme font for mermaid diagrams too Signed-off-by: Bernhard Kaindl --- doc/hugo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/hugo.toml b/doc/hugo.toml index 9c58a1eea17..a35112db945 100644 --- a/doc/hugo.toml +++ b/doc/hugo.toml @@ -46,6 +46,9 @@ themeVariant = [ ] # auto switches between "red" and "zen-dark" depending on the browser/OS dark mode: themeVariantAuto = ["red", "zen-dark"] +# Consistency: Use the font of the Hugo Relearn theme also for Mermaid diagrams: +# securityLevel=loose is the default of Relearn, it allows HTML links in diagrams: +mermaidInitialize = '{ "fontFamily": "Roboto Flex", "securityLevel": "loose" }' alwaysopen = false collapsibleMenu = true From f58c32aa735d991765d3c8cb0d73993091507496 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 037/117] docs: Add dedicated walk-throughs for VM.build and xenguest Signed-off-by: Bernhard Kaindl --- .../walkthroughs/VM.build/Domain.build.md | 137 +++++++++++++ .../xenopsd/walkthroughs/VM.build/VM_build.md | 58 ++++++ .../xenopsd/walkthroughs/VM.build/_index.md | 24 +++ .../xenopsd/walkthroughs/VM.build/xenguest.md | 184 ++++++++++++++++++ doc/content/xenopsd/walkthroughs/VM.start.md | 140 ++++--------- 5 files changed, 436 insertions(+), 107 deletions(-) create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/VM_build.md create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/_index.md create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/xenguest.md diff --git a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md new file mode 100644 index 00000000000..8514e13eefd --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md @@ -0,0 +1,137 @@ +--- +title: Domain.build +description: + "Prepare the build of a VM: Wait for scrubbing, do NUMA placement, run xenguest." +--- + +## Overview + +```mermaid +flowchart LR +subgraph xenopsd VM_build[ + xenopsd thread pool with two VM_build micro#8209;ops: + During parallel VM_start, Many threads run this in parallel! +] +direction LR +build_domain_exn[ + VM.build_domain_exn + from thread pool Thread #1 +] --> Domain.build +Domain.build --> build_pre +build_pre --> wait_xen_free_mem +build_pre -->|if NUMA/Best_effort| numa_placement +Domain.build --> xenguest[Invoke xenguest] +click Domain.build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210" _blank +click build_domain_exn "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2222-L2225" _blank +click wait_xen_free_mem "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272" _blank +click numa_placement "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L862-L897" _blank +click build_pre "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L899-L964" _blank +click xenguest "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1139-L1146" _blank + +build_domain_exn2[ + VM.build_domain_exn + from thread pool Thread #2] --> Domain.build2[Domain.build] +Domain.build2 --> build_pre2[build_pre] +build_pre2 --> wait_xen_free_mem2[wait_xen_free_mem] +build_pre2 -->|if NUMA/Best_effort| numa_placement2[numa_placement] +Domain.build2 --> xenguest2[Invoke xenguest] +click Domain.build2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210" _blank +click build_domain_exn2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2222-L2225" _blank +click wait_xen_free_mem2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272" _blank +click numa_placement2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L862-L897" _blank +click build_pre2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L899-L964" _blank +click xenguest2 "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1139-L1146" _blank +end +``` + +[`VM.build_domain_exn`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248) +[calls](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2222-L2225) +[`Domain.build`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210) +to call: +- `build_pre` to prepare the build of a VM: + - If the `xe` config `numa_placement` is set to `Best_effort`, invoke the NUMA placement algorithm. + - Run `xenguest` +- `xenguest` to invoke the [xenguest](xenguest) program to setup the domain's system memory. + +## Domain Build Preparation using build_pre + +[`Domain.build`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210) +[calls](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1137) +the [function `build_pre`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L899-L964) +(which is also used for VM restore). It must: + +1. [Call](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L902-L911) + [wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272) + to wait, if necessary, for the Xen memory scrubber to catch up reclaiming memory (CA-39743) +2. Call the hypercall to set the timer mode +3. Call the hypercall to set the number of vCPUs +4. As described in the [NUMA feature description](../../toolstack/features/NUMA), + when the `xe` configuration option `numa_placement` is set to `Best_effort`, + except when the VM has a hard affinity set, invoke the `numa_placement` function: + + ```ml + match !Xenops_server.numa_placement with + | Any -> + () + | Best_effort -> + log_reraise (Printf.sprintf "NUMA placement") (fun () -> + if has_hard_affinity then + D.debug "VM has hard affinity set, skipping NUMA optimization" + else + numa_placement domid ~vcpus + ~memory:(Int64.mul memory.xen_max_mib 1048576L) + ) + ``` + +## NUMA placement + +`build_pre` passes the `domid`, the number of `vCPUs` and `xen_max_mib` to the +[numa_placement](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L862-L897) +function to run the algorithm to find the best NUMA placement. + +When it returns a NUMA node to use, it calls the Xen hypercalls +to set the vCPU affinity to this NUMA node: + +```ml + let vm = NUMARequest.make ~memory ~vcpus in + let nodea = + match !numa_resources with + | None -> + Array.of_list nodes + | Some a -> + Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + in + numa_resources := Some nodea ; + Softaffinity.plan ~vm host nodea +``` + +By using the default `auto_node_affinity` feature of Xen, +setting the vCPU affinity causes the Xen hypervisor to activate +NUMA node affinity for memory allocations to be aligned with +the vCPU affinity of the domain. + +Note: See the Xen domain's +[auto_node_affinity](https://wiki.xenproject.org/wiki/NUMA_node_affinity_in_the_Xen_hypervisor) +feature flag, which controls this, which can be overridden in the +Xen hypervisor if needed for specific VMs. + +This can be used, for example, when there might not be enough memory +on the preferred NUMA node, but there are other NUMA nodes that have +enough free memory among with the memory allocations shall be done. + +In terms of future NUMA design, it might be even more favourable to +have a strategy in `xenguest` where in such cases, the superpages +of the preferred node are used first and a fallback to neighbouring +NUMA nodes only happens to the extent necessary. + +Likely, the future allocation strategy should be passed to `xenguest` +using Xenstore like the other platform parameters for the VM. + +Summary: This passes the information to the hypervisor that memory +allocation for this domain should preferably be done from this NUMA node. + +## Invoke the xenguest program + +With the preparation in `build_pre` completed, `Domain.build` +[calls](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1127-L1155) +the `xenguest` function to invoke the [xenguest](xenguest) program to build the domain. diff --git a/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md new file mode 100644 index 00000000000..c488d9b7c1c --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md @@ -0,0 +1,58 @@ +--- +title: VM_build micro-op +linkTitle: VM_build μ-op +description: Overview of the VM_build μ-op (runs after the VM_create μ-op created the domain). +weight: 10 +--- + +## Overview + +On Xen, `Xenctrl.domain_create` creates an empty domain and +returns the domain ID (`domid`) of the new domain to `xenopsd`. + +In the `build` phase, the `xenguest` program is called to create +the system memory layout of the domain, set vCPU affinity and a +lot more. + +The [VM_build](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271) +micro-op collects the VM build parameters and calls +[VM.build](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291), +which calls +[VM.build_domain](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288), +which calls +[VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248) +which calls [Domain.build](Domain.build): + +```mermaid +flowchart +subgraph xenopsd VM_build[xenopsd VM_build micro#8209;op] +direction LR +VM_build --> VM.build +VM.build --> VM.build_domain +VM.build_domain --> VM.build_domain_exn +VM.build_domain_exn --> Domain.build +click VM_build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271" _blank +click VM.build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291" _blank +click VM.build_domain "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288" _blank +click VM.build_domain_exn "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248" _blank +click Domain.build "../Domain.build/index.html" +end +``` + +The function +[VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024) +must: + +1. Run pygrub (or eliloader) to extract the kernel and initrd, if necessary +2. [Call](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2222-L2225) + [Domain.build](Domain.build) + to: + - optionally run NUMA placement and + - invoke [xenguest](VM.build/xenguest) to set up the domain memory. + + See the walk-though on [VM.build](VM.build) for more details on this phase. +3. Apply the `cpuid` configuration +4. Store the current domain configuration on disk -- it's important to know + the difference between the configuration you started with and the configuration + you would use after a reboot because some properties (such as maximum memory + and vCPUs) as fixed on create. diff --git a/doc/content/xenopsd/walkthroughs/VM.build/_index.md b/doc/content/xenopsd/walkthroughs/VM.build/_index.md new file mode 100644 index 00000000000..0a5d73d70cf --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/_index.md @@ -0,0 +1,24 @@ +--- +title: Building a VM +description: After VM_create, VM_build builds the core of the domain (vCPUs, memory) +weight: 20 +--- + +Walk-through documents for the `VM_build` phase: + +```mermaid +flowchart +subgraph xenopsd VM_build[xenopsd VM_build micro#8209;op] +direction LR +VM_build --> VM.build +VM.build --> VM.build_domain +VM.build_domain --> VM.build_domain_exn +VM.build_domain_exn --> Domain.build +click VM_build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271" _blank +click VM.build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291" _blank +click VM.build_domain "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288" _blank +click VM.build_domain_exn "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248" _blank +end +``` + +{{% children description=true %}} diff --git a/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md new file mode 100644 index 00000000000..66345f018ec --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md @@ -0,0 +1,184 @@ +--- +title: xenguest +description: + "Perform building VMs: Allocate and populate the domain's system memory." +--- + +# Flowchart + +xenguest is called as part of starting a new domain in VM_build: + +```mermaid +flowchart +subgraph xenopsd VM_build[xenopsd VM_build micro#8209;ops] +direction LR +xenopsd1[Domain.build - Thread #1] --> xenguest1[xenguest #1] +xenopsd2[Domain.build - Thread #2] --> xenguest2[xenguest #2] +xenguest1 --> libxenguest +xenguest2 --> libxenguest2[libxenguest] +click xenopsd1 "../Domain.build/index.html" +click xenopsd2 "../Domain.build/index.html" +click xenguest1 "https://github.com/xenserver/xen.pg/blob/XS-8/patches/xenguest.patch" _blank +click xenguest2 "https://github.com/xenserver/xen.pg/blob/XS-8/patches/xenguest.patch" _blank +click libxenguest "https://github.com/xen-project/xen/tree/master/tools/libs/guest" _blank +click libxenguest2 "https://github.com/xen-project/xen/tree/master/tools/libs/guest" _blank +libxenguest --> Xen[Xen
Hypervisor] +libxenguest2 --> Xen +end +``` + +# About xenguest +`xenguest` is called by the xenopsd [Domain.build](Domain.build) function +to perform the build phase for new VMs, which is part of the `xenopsd` +[VM.start operation](VM.start). + +[xenguest](https://github.com/xenserver/xen.pg/blob/XS-8/patches/xenguest.patch) +was created as a separate program due to issues with +`libxenguest`: + +- It wasn't threadsafe: fixed, but it still uses a per-call global struct +- It had an incompatible licence, but now licensed under the LGPL. + +Those were fixed, but we still shell out to `xenguest`, which is currently +carried in the patch queue for the Xen hypervisor packages, but could become +an individual package once planned changes to the Xen hypercalls are stabilised. + +Over time, `xenguest` has evolved to build more of the initial domain state. + +# Interface to xenguest + +```mermaid +flowchart +subgraph xenopsd VM_build[xenopsd VM_build micro#8209;op] +direction TB +mode +domid +memmax +Xenstore +end +mode[--mode build_hvm] --> xenguest +domid --> xenguest +memmax --> xenguest +Xenstore[Xenstore platform data] --> xenguest +``` + +`xenopsd` must pass this information to `xenguest` to build a VM: + +- The domain type to build for (HVM, PHV or PV). + - It is passed using the command line option `--mode hvm_build`. +- The `domid` of the created empty domain, +- The amount of system memory of the domain, +- A number of other parameters that are domain-specific. + +Using the Xenstore, the platform data (vCPUs, vCPU affinity, etc) is passed: +- the vCPU affinity +- the vCPU credit2 weight/cap parameters +- whether the NX bit is exposed +- whether the viridian CPUID leaf is exposed +- whether the system has PAE or not +- whether the system has ACPI or not +- whether the system has nested HVM or not +- whether the system has an HPET or not + +When called to build a domain, `xenguest` reads those and builds the VM accordingly. + +## Walkthrough of the xenguest build mode + +```mermaid +flowchart +subgraph xenguest[xenguest #8209;#8209;mode hvm_build domid] +direction LR +stub_xc_hvm_build[stub_xc_hvm_build#40;#41;] --> get_flags[ + get_flags#40;#41; <#8209; Xenstore platform data +] +stub_xc_hvm_build --> configure_vcpus[ + configure_vcpus#40;#41; #8209;> Xen hypercall +] +stub_xc_hvm_build --> setup_mem[ + setup_mem#40;#41; #8209;> Xen hypercalls to setup domain memory +] +end +``` + +Based on the given domain type, the `xenguest` program calls dedicated +functions for the build process of the given domain type. + +These are: + +- `stub_xc_hvm_build()` for HVM, +- `stub_xc_pvh_build()` for PVH, and +- `stub_xc_pv_build()` for PV domains. + +These domain build functions call these functions: + +1. `get_flags()` to get the platform data from the Xenstore +2. `configure_vcpus()` which uses the platform data from the Xenstore to configure vCPU affinity and the credit scheduler parameters vCPU weight and vCPU cap (max % pCPU time for throttling) +3. The `setup_mem` function for the given VM type. + +## The function hvm_build_setup_mem() + +For HVM domains, `hvm_build_setup_mem()` is responsible for deriving the memory +layout of the new domain, allocating the required memory and populating for the +new domain. It must: + +1. Derive the `e820` memory layout of the system memory of the domain + including memory holes depending on PCI passthrough and vGPU flags. +2. Load the BIOS/UEFI firmware images +3. Store the final MMIO hole parameters in the Xenstore +4. Call the `libxenguest` function `xc_dom_boot_mem_init()` (see below) +5. Call `construct_cpuid_policy()` to apply the CPUID `featureset` policy + +## The function xc_dom_boot_mem_init() + +```mermaid +flowchart LR +subgraph xenguest +hvm_build_setup_mem[hvm_build_setup_mem#40;#41;] +end +subgraph libxenguest +hvm_build_setup_mem --> xc_dom_boot_mem_init[xc_dom_boot_mem_init#40;#41;] +xc_dom_boot_mem_init -->|vmemranges| meminit_hvm[meninit_hvm#40;#41;] +click xc_dom_boot_mem_init "https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_boot.c#L110-L126" _blank +click meminit_hvm "https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_x86.c#L1348-L1648" _blank +end +``` + +`hvm_build_setup_mem()` calls +[xc_dom_boot_mem_init()](https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_boot.c#L110-L126) +to allocate and populate the domain's system memory. + +It calls +[meminit_hvm()](https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_x86.c#L1348-L1648) +to loop over the `vmemranges` of the domain for mapping the system RAM +of the guest from the Xen hypervisor heap. Its goals are: + +- Attempt to allocate 1GB superpages when possible +- Fall back to 2MB pages when 1GB allocation failed +- Fall back to 4k pages when both failed + +It uses the hypercall +[XENMEM_populate_physmap](https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1408-L1477) +to perform memory allocation and to map the allocated memory +to the system RAM ranges of the domain. + +https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1022-L1071 + +`XENMEM_populate_physmap`: + +1. Uses + [construct_memop_from_reservation](https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1022-L1071) + to convert the arguments for allocating a page from + [struct xen_memory_reservation](https://github.com/xen-project/xen/blob/master/xen/include/public/memory.h#L46-L80) + to `struct memop_args`. +2. Sets flags and calls functions according to the arguments +3. Allocates the requested page at the most suitable place + - depending on passed flags, allocate on a specific NUMA node + - else, if the domain has node affinity, on the affine nodes + - also in the most suitable memory zone within the NUMA node +4. Falls back to less desirable places if this fails + - or fail for "exact" allocation requests +5. When no pages of the requested size are free, + it splits larger superpages into pages of the requested size. + +For more details on the VM build step involving `xenguest` and Xen side see: +https://wiki.xenproject.org/wiki/Walkthrough:_VM_build_using_xenguest diff --git a/doc/content/xenopsd/walkthroughs/VM.start.md b/doc/content/xenopsd/walkthroughs/VM.start.md index 52201fd7218..e28f98e6451 100644 --- a/doc/content/xenopsd/walkthroughs/VM.start.md +++ b/doc/content/xenopsd/walkthroughs/VM.start.md @@ -1,5 +1,7 @@ --- title: 'Walkthrough: Starting a VM' +linktitle: 'Starting a VM' +weight: 10 --- A Xenopsd client wishes to start a VM. They must first tell Xenopsd the VM @@ -89,7 +91,7 @@ exist for: From here we shall assume the use of the "Xen via libxc, libxenguest and xenstore" (a.k.a. "Xenopsd classic") backend. -The backend [VM.add](https://github.com/xapi-project/xenopsd/blob/2a476c132c0b5732f9b224316b851a1b4d57520b/xc/xenops_server_xen.ml#L719) +The backend [VM.add](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L1603-L1659) function checks whether the VM we have to manage already exists -- and if it does then it ensures the Xenstore configuration is intact. This Xenstore configuration is important because at any time a client can query the state of a VM with @@ -196,24 +198,43 @@ takes care of: Once a thread from the worker pool becomes free, it will execute the "do it now" function. In the example above this is `perform op t` where `op` is `VM_start vm` and `t` is the Task. The function -[perform](https://github.com/xapi-project/xenopsd/blob/524d57b3c70/lib/xenops_server.ml#L1198) +[perform_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2533) has fragments like this: ```ocaml - | VM_start id -> - debug "VM.start %s" id; - perform_atomics (atomics_of_operation op) t; - VM_DB.signal id + | VM_start (id, force) -> ( + debug "VM.start %s (force=%b)" id force ; + let power = (B.VM.get_state (VM_DB.read_exn id)).Vm.power_state in + match power with + | Running -> + info "VM %s is already running" id + | _ -> + perform_atomics (atomics_of_operation op) t ; + VM_DB.signal id "^^^^^^^^^^^^^^^^^^^^-------- + ) ``` Each "operation" (e.g. `VM_start vm`) is decomposed into "micro-ops" by the function -[atomics_of_operation](https://github.com/xapi-project/xenopsd/blob/524d57b3c70/lib/xenops_server.ml#L739) +[atomics_of_operation](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L1583) where the micro-ops are small building-block actions common to the higher-level operations. Each operation corresponds to a list of "micro-ops", where there is no if/then/else. Some of the "micro-ops" may be a no-op depending on the VM configuration (for example a PV domain may not need a qemu). In the case of -`VM_start vm` this decomposes into the sequence: +[`VM_start vm`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L1584) +the `Xenopsd` server starts by calling the [functions that +decompose](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L1612-L1714) + the `VM_hook_script`, `VM_create` and `VM_build` micro-ops: +```ml + dequarantine_ops vgpus + ; [ + VM_hook_script + (id, Xenops_hooks.VM_pre_start, Xenops_hooks.reason__none) + ; VM_create (id, None, None, no_sharept) + ; VM_build (id, force) + ] +``` +This is the complete sequence of micro-ops: ## 1. run the "VM_pre_start" scripts @@ -259,105 +280,10 @@ function must ## 3. build the domain -On Xen, `Xenctrl.domain_create` creates an empty domain and -returns the domain ID (`domid`) of the new domain to `xenopsd`. - -In the `build` phase, the `xenguest` program is called to create -the system memory layout of the domain, set vCPU affinity and a -lot more. - -The function -[VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024) -must - -1. run pygrub (or eliloader) to extract the kernel and initrd, if necessary -2. invoke the *xenguest* binary to interact with libxenguest. -3. apply the `cpuid` configuration -4. store the current domain configuration on disk -- it's important to know - the difference between the configuration you started with and the configuration - you would use after a reboot because some properties (such as maximum memory - and vCPUs) as fixed on create. - -### 3.1 Interface to xenguest for building domains - -[xenguest](https://github.com/xenserver/xen.pg/blob/XS-8/patches/xenguest.patch) -was originally created as a separate program due to issues with -`libxenguest` that were fixed, but we still shell out to `xenguest`: - -- Wasn't threadsafe: fixed, but it still uses a per-call global struct -- Incompatible licence, but now licensed under the LGPL. - -The `xenguest` binary has evolved to build more of the initial -domain state. `xenopsd` passes it: - -- The domain type to build for (HVM, PHV or PV), -- The `domid` of the created empty domain, -- The amount of system memory of the domain, -- The platform data (vCPUs, vCPU affinity, etc) using the Xenstore: - - the vCPU affinity - - the vCPU credit2 weight/cap parameters - - whether the NX bit is exposed - - whether the viridian CPUID leaf is exposed - - whether the system has PAE or not - - whether the system has ACPI or not - - whether the system has nested HVM or not - - whether the system has an HPET or not - -When called to build a domain, `xenguest` reads those and builds the VM accordingly. - -### 3.2 Workflow for allocating and populating domain memory - -Based on the given type, the `xenguest` program calls dedicated -functions for the build process of given domain type. - -- For HVM, this function is `stub_xc_hvm_build()`. - -These domain build functions call these functions: - -1. `get_flags()` to get the platform data from the Xenstore -2. `configure_vcpus()` which uses the platform data from the Xenstore to configure vCPU affinity and the credit scheduler parameters vCPU weight and vCPU cap (max % pCPU time for throttling) -3. For HVM, `hvm_build_setup_mem` to: - 1. Decide the `e820` memory layout of the system memory of the domain - including memory holes depending on PCI passthrough and vGPU flags. - 2. Load the BIOS/UEFI firmware images - 3. Store the final MMIO hole parameters in the Xenstore - 4. Call the `libxenguest` function - [xc_dom_boot_mem_init()](https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/tools/libs/guest/xg_dom_boot.c#L110-L126) - to allocate and map the domain's system memory. - For HVM domains, it calls - [meminit_hvm()](https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/tools/libs/guest/xg_dom_x86.c#L1348-L1648) - to loop over the `vmemranges` of the domain for mapping the system RAM - of the guest from the Xen hypervisor heap. Its goals are: - - - Attempt to allocate 1GB superpages when possible - - Fall back to 2MB pages when 1GB allocation failed - - Fall back to 4k pages when both failed - - It uses the hypercall - [XENMEM_populate_physmap()]( - https://github.com/xen-project/xen/blob/39c45caef271bc2b2e299217450cfda24c0c772a/xen/common/memory.c#L1408-L1477) - to perform memory allocation and to map the allocated memory - to the system RAM ranges of the domain. - The hypercall must: - - 1. convert the arguments for allocating a page to hypervisor structures - 2. set flags and calls functions according to the arguments - 3. allocate the requested page at the most suitable place - - - depending on passed flags, allocate on a specific NUMA node - - else, if the domain has node affinity, on the affine nodes - - also in the most suitable memory zone within the NUMA node - - 4. fall back to less desirable places if this fails - - - or fail for "exact" allocation requests - - 5. split superpages if pages of the requested size are not available - - 5. Call `construct_cpuid_policy()` to apply the `CPUID` `featureset` policy - - For more details on the VM build step involving xenguest and Xen side see: - https://wiki.xenproject.org/wiki/Walkthrough:_VM_build_using_xenguest +The `build` phase waits, if necessary, for the Xen memory scrubber to catch +up reclaiming memory, runs NUMA placement, sets vCPU affinity and invokes +the `xenguest` to build the system memory layout of the domain. +See the [walk-through of the VM_build μ-op](VM.build) for details. ## 4. mark each VBD as "active" From 27de477595e3666ad7e8deb6bb4d33372b62713e Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 038/117] xenopsd docs: Add Walk-through descriptions, show them on the index page Signed-off-by: Bernhard Kaindl --- doc/content/xenopsd/walkthroughs/VM.migrate.md | 3 +++ doc/content/xenopsd/walkthroughs/VM.start.md | 1 + doc/content/xenopsd/walkthroughs/_index.md | 2 +- doc/content/xenopsd/walkthroughs/live-migration.md | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.migrate.md b/doc/content/xenopsd/walkthroughs/VM.migrate.md index 080ebdb8edc..e3517ab3f0f 100644 --- a/doc/content/xenopsd/walkthroughs/VM.migrate.md +++ b/doc/content/xenopsd/walkthroughs/VM.migrate.md @@ -1,5 +1,8 @@ --- title: 'Walkthrough: Migrating a VM' +linktitle: 'Migrating a VM' +description: Walkthrough of migrating a VM from one host to another. +weight: 50 --- A XenAPI client wishes to migrate a VM from one host to another within diff --git a/doc/content/xenopsd/walkthroughs/VM.start.md b/doc/content/xenopsd/walkthroughs/VM.start.md index e28f98e6451..6a12e9d9c60 100644 --- a/doc/content/xenopsd/walkthroughs/VM.start.md +++ b/doc/content/xenopsd/walkthroughs/VM.start.md @@ -1,6 +1,7 @@ --- title: 'Walkthrough: Starting a VM' linktitle: 'Starting a VM' +description: Complete walkthrough of starting a VM, from receiving the request to unpause. weight: 10 --- diff --git a/doc/content/xenopsd/walkthroughs/_index.md b/doc/content/xenopsd/walkthroughs/_index.md index 2217b209ab6..6fe3f551f29 100644 --- a/doc/content/xenopsd/walkthroughs/_index.md +++ b/doc/content/xenopsd/walkthroughs/_index.md @@ -6,7 +6,7 @@ linkTitle = "Walk-throughs" Let's trace through interesting operations to see how the whole system works. -{{% children %}} +{{% children depth=2 description=true %}} Inspiration for other walk-throughs: diff --git a/doc/content/xenopsd/walkthroughs/live-migration.md b/doc/content/xenopsd/walkthroughs/live-migration.md index f0af797f85e..c6fa02d95fa 100644 --- a/doc/content/xenopsd/walkthroughs/live-migration.md +++ b/doc/content/xenopsd/walkthroughs/live-migration.md @@ -1,6 +1,7 @@ +++ title = "Live Migration Sequence Diagram" linkTitle = "Live Migration" +description = "Sequence diagram of the process of Live Migration." +++ {{}} From 6617803ea92a322a5e6937f89c8054fcfc06706b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 8 Jan 2025 08:23:37 +0000 Subject: [PATCH 039/117] python3: Resurrect metrics.py helper script This script was previously deleted in #3949, update it to Python 3 and the new API methods (VIF.get_metrics and the like were removed years ago), get rid of globals and remove the sanitycheck library usage. Install it alongside other libexec python scripts. The script prints out RRD metrics in the following format: ``` $ ./metrics.py ------------ Host Metrics ------------ name_label : 'lcy2-dt29' metrics : {'last_updated': , 'live': True, 'memory_free': '22100557824', 'memory_total': '34172760064', 'other_config': {}, 'uuid': '14cbb5db-63d8-f631-7179-6011cc578305'} host_cpus : [{'family': '6', 'features': '', 'flags': 'fpu de tsc msr pae mce cx8 apic sep mca cmov pat clflush acpi mmx ' 'fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc rep_good ' 'nopl nonstop_tsc cpuid pni pclmulqdq monitor est ssse3 fma cx16 ' 'sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor ' 'lahf_lm abm 3dnowprefetch cpuid_fault ssbd ibrs ibpb stibp ' 'fsgsbase bmi1 avx2 bmi2 erms rdseed adx clflushopt xsaveopt xsavec ' 'xgetbv1 arch_capabilities', 'host': 'OpaqueRef:5cdd5f91-b5e2-ba4f-09df-0f4ef9b77ef4', 'model': '158', 'modelname': 'Intel(R) Xeon(R) CPU E3-1230 v6 @ 3.50GHz', 'number': '7', 'other_config': {}, 'speed': '3503', 'stepping': '9', 'utilisation': 0.0, 'uuid': '99af3b01-15c3-a40b-16bb-2f5f47bf2528', 'vendor': 'GenuineIntel'}, ... ------------ ----------------------- Virtual Machine Metrics ----------------------- name_label : 'CentOS Stream 9 (1)' metrics : {'VCPUs_CPU': {}, 'VCPUs_flags': {}, 'VCPUs_number': '1', 'VCPUs_params': {}, 'VCPUs_utilisation': {'0': 0.0}, 'current_domain_type': 'hvm', 'hvm': True, 'install_time': , 'last_updated': , 'memory_actual': '4294955008', 'nested_virt': False, 'nomigrate': False, 'other_config': {}, 'start_time': , 'state': [], 'uuid': '40cb2d92-f633-382e-38a5-276cae495834'} guest_metrics : {'PV_drivers_detected': True, 'PV_drivers_up_to_date': True, 'PV_drivers_version': {}, 'can_use_hotplug_vbd': 'unspecified', 'can_use_hotplug_vif': 'unspecified', 'disks': {}, 'last_updated': , 'live': True, 'memory': {}, 'netbios_name': {}, 'networks': {}, 'os_version': {}, 'other': {'feature-poweroff': '1', 'feature-reboot': '1', 'feature-suspend': '1', 'feature-vcpu-hotplug': '1', 'has-vendor-device': '0', 'platform-feature-multiprocessor-suspend': '1', 'platform-feature-xs_reset_watches': '1'}, 'other_config': {}, 'uuid': '6829a282-7f29-1749-d148-eeda54bbecd4'} -- name_label : 'Control domain on host: lcy2-dt29' metrics : {'VCPUs_CPU': {}, 'VCPUs_flags': {}, 'VCPUs_number': '8', 'VCPUs_params': {}, 'VCPUs_utilisation': {'1': 0.0, '2': 0.0, '3': 0.0, '4': 0.0, '5': 0.0, '6': 0.0, '7': 0.0, '8': 0.0}, 'current_domain_type': 'pv', 'hvm': False, 'install_time': , 'last_updated': , 'memory_actual': '2785017856', 'nested_virt': False, 'nomigrate': False, 'other_config': {}, 'start_time': , 'state': [], 'uuid': 'f7385e35-f114-c165-d66c-e9b01ec47ce9'} guest_metrics : 'NULL' -- name_label : 'Windows 10 (64-bit) (1)' metrics : {'VCPUs_CPU': {}, 'VCPUs_flags': {}, 'VCPUs_number': '2', 'VCPUs_params': {}, 'VCPUs_utilisation': {'0': 0.0}, 'current_domain_type': 'hvm', 'hvm': True, 'install_time': , 'last_updated': , 'memory_actual': '4297043968', 'nested_virt': False, 'nomigrate': False, 'other_config': {}, 'start_time': , 'state': [], 'uuid': '548a4d19-5db3-bce7-1a18-ab9affc2e9ee'} guest_metrics : {'PV_drivers_detected': True, 'PV_drivers_up_to_date': True, 'PV_drivers_version': {}, 'can_use_hotplug_vbd': 'yes', 'can_use_hotplug_vif': 'yes', 'disks': {}, 'last_updated': , 'live': True, 'memory': {}, 'netbios_name': {}, 'networks': {'0/ip': '10.71.58.127', '0/ipv4/0': '10.71.58.127', '0/ipv6/0': 'fe80:0000:0000:0000:70ad:88ff:febb:643d'}, 'os_version': {}, 'other': {'data-cant-suspend-reason': '{"error":{"class":"GenericError","desc":"State ' 'blocked by non-migratable device ' '\'0000:00:07.0/nvme\'","data":{}},"id":"qmp-000013-4"}', 'feature-balloon': '1', 'feature-laptop-slate-mode': '1', 'feature-poweroff': '1', 'feature-reboot': '1', 'feature-s3': '1', 'feature-s4': '1', 'feature-shutdown': '1', 'has-vendor-device': '1', 'platform-feature-multiprocessor-suspend': '1', 'platform-feature-xs_reset_watches': '1'}, 'other_config': {}, 'uuid': 'dcd51a5f-3017-a7ec-3d35-7648366b887e'} ----------------------- ----------- VIF metrics ----------- name_label : 'VIF connecting "NPRI bond of 0 1" to "CentOS Stream 9 (1)"' metrics : {'vif_0_rx': 0.0, 'vif_0_rx_errors': 0.0, 'vif_0_tx': 0.0, 'vif_0_tx_errors': 0.0} -- name_label : 'VIF connecting "NPRI bond of 0 1" to "Windows 10 (64-bit) (1)"' metrics : {'vif_0_rx': 0.0, 'vif_0_rx_errors': 0.0, 'vif_0_tx': 0.0, 'vif_0_tx_errors': 0.0} ----------- ----------- VBD Metrics ----------- name_label : ('VBD connecting "CentOS Stream 9 (1)" to ' '"CentOS-Stream-8-x86_64-latest-boot.iso"') metrics : {'vbd_xvdd_avgqu_sz': 0.0, 'vbd_xvdd_inflight': 0.0, 'vbd_xvdd_io_throughput_read': 0.0, 'vbd_xvdd_io_throughput_total': 0.0, 'vbd_xvdd_io_throughput_write': 0.0, 'vbd_xvdd_iops_read': 0.0, 'vbd_xvdd_iops_total': 0.0, 'vbd_xvdd_iops_write': 0.0, 'vbd_xvdd_iowait': 0.0, 'vbd_xvdd_latency': 0.0, 'vbd_xvdd_read': 0.0, 'vbd_xvdd_read_latency': 0.0, 'vbd_xvdd_write': 0.0, 'vbd_xvdd_write_latency': 0.0} -- name_label : 'VBD connecting "Windows 10 (64-bit) (1)" to "win10-x64_uefi.iso"' metrics : {'vbd_xvdd_avgqu_sz': 0.0, 'vbd_xvdd_inflight': 0.0, 'vbd_xvdd_io_throughput_read': 0.0, 'vbd_xvdd_io_throughput_total': 0.0, 'vbd_xvdd_io_throughput_write': 0.0, 'vbd_xvdd_iops_read': 0.0, 'vbd_xvdd_iops_total': 0.0, 'vbd_xvdd_iops_write': 0.0, 'vbd_xvdd_iowait': 0.0, 'vbd_xvdd_latency': 0.0, 'vbd_xvdd_read': 0.0, 'vbd_xvdd_read_latency': 0.0, 'vbd_xvdd_write': 0.0, 'vbd_xvdd_write_latency': 0.0} -- name_label : 'VBD connecting "Windows 10 (64-bit) (1)" to "Windows 10 (64-bit) (1) 0"' metrics : {'vbd_xvda_avgqu_sz': 0.0, 'vbd_xvda_inflight': 0.0, 'vbd_xvda_io_throughput_read': 0.0, 'vbd_xvda_io_throughput_total': 0.0, 'vbd_xvda_io_throughput_write': 0.0, 'vbd_xvda_iops_read': 0.0, 'vbd_xvda_iops_total': 0.0, 'vbd_xvda_iops_write': 0.0, 'vbd_xvda_iowait': 0.0, 'vbd_xvda_latency': 0.0, 'vbd_xvda_read': 0.0, 'vbd_xvda_read_latency': 0.0, 'vbd_xvda_write': 0.0, 'vbd_xvda_write_latency': 0.0} -- name_label : 'VBD connecting "CentOS Stream 9 (1)" to "CentOS Stream 9 (1) 0"' metrics : {'vbd_xvda_avgqu_sz': 0.0, 'vbd_xvda_inflight': 0.0, 'vbd_xvda_io_throughput_read': 0.0, 'vbd_xvda_io_throughput_total': 0.0, 'vbd_xvda_io_throughput_write': 0.0, 'vbd_xvda_iops_read': 0.0, 'vbd_xvda_iops_total': 0.0, 'vbd_xvda_iops_write': 0.0, 'vbd_xvda_iowait': 0.0, 'vbd_xvda_latency': 0.0, 'vbd_xvda_read': 0.0, 'vbd_xvda_read_latency': 0.0, 'vbd_xvda_write': 0.0, 'vbd_xvda_write_latency': 0.0} ----------- --------------- Network Metrics --------------- name_label : 'NPRI bond of 0 1' --------------- ----------- PIF Metrics ----------- name_label : 'bond0 on lcy2-dt29' metrics : {'pif_bond0_rx': 0.0, 'pif_bond0_rx_errors': 0.0, 'pif_bond0_tx': 0.0, 'pif_bond0_tx_errors': 0.0} ----------- Active Objects host_metrics ['lcy2-dt29'] vm_metrics ['CentOS Stream 9 (1)', 'Control domain on host: lcy2-dt29', 'Windows 10 (64-bit) (1)'] vif_metrics ['VIF connecting "NPRI bond of 0 1" to "CentOS Stream 9 (1)"', 'VIF connecting "NPRI bond of 0 1" to "Windows 10 (64-bit) (1)"'] vbd_metrics ['VBD connecting "CentOS Stream 9 (1)" to "CentOS-Stream-8-x86_64-latest-boot.iso"', 'VBD connecting "Windows 10 (64-bit) (1)" to "win10-x64_uefi.iso"', 'VBD connecting "Windows 10 (64-bit) (1)" to "Windows 10 (64-bit) (1) 0"', 'VBD connecting "CentOS Stream 9 (1)" to "CentOS Stream 9 (1) 0"'] network_metrics ['NPRI bond of 0 1'] pif_metrics ['bond0 on lcy2-dt29'] ``` Signed-off-by: Andrii Sultanov --- python3/Makefile | 1 + python3/libexec/metrics.py | 185 +++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 python3/libexec/metrics.py diff --git a/python3/Makefile b/python3/Makefile index fed125c01bb..876b1b8ff49 100644 --- a/python3/Makefile +++ b/python3/Makefile @@ -23,6 +23,7 @@ install: $(IDATA) dnf_plugins/accesstoken.py $(DESTDIR)$(SITE3_DIR)/$(DNF_PLUGIN_DIR)/ $(IDATA) dnf_plugins/ptoken.py $(DESTDIR)$(SITE3_DIR)/$(DNF_PLUGIN_DIR)/ + $(IPROG) libexec/metrics.py $(DESTDIR)$(OPTDIR)/debug $(IPROG) libexec/host-display $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/link-vms-by-sr.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/usb_reset.py $(DESTDIR)$(LIBEXECDIR) diff --git a/python3/libexec/metrics.py b/python3/libexec/metrics.py new file mode 100644 index 00000000000..209769515f0 --- /dev/null +++ b/python3/libexec/metrics.py @@ -0,0 +1,185 @@ +#!/usr/bin/python3 + +import atexit +import contextlib +from pprint import pprint, pformat + +import XenAPI + + +# given a list of dictionaries, print selected keys in order from each one, nicely formatted with a title +def dictionary_list_partial_print(title, dictionary_list, keys): + bar = "-" * len(title) + print(bar, "\n", title, "\n", bar) + print( + "\n--\n".join( + [ + "\n".join(["%s : %s" % (k, pformat(d[k])) for k in keys]) + for d in dictionary_list + ] + ) + ) + print(bar) + + +# x, 'VM', 'guest_metrics' -> guest_metrics_record of the VM x +# catch the NULL if the record doesn't exist for some reason, and return the string 'NULL' +def fetch_metrics_record(sx, object_reference, type_string, metrics_name): + record_reference = sx.__getattr__(type_string).__getattr__("get_" + metrics_name)( + object_reference + ) + if record_reference == "OpaqueRef:NULL": + return "NULL" + else: + return sx.__getattr__(f"{type_string}_{metrics_name}").get_record( + record_reference + ) + + +def fetch_rrd_records(sx, object_reference, type_string, data_owner): + obj_class = sx.__getattr__(type_string) + owner_class = sx.__getattr__(data_owner) + belongs_to = obj_class.__getattr__(f"get_{data_owner}")(object_reference) + device_number = obj_class.__getattr__("get_device")(object_reference) + related_data_sources = [ + x + for x in owner_class.get_data_sources(belongs_to) + if x["name_label"].startswith(f"{type_string.lower()}_{device_number}") + ] + related_data_sources = {x["name_label"]: x["value"] for x in related_data_sources} + return related_data_sources + + +# the names of the vbds are a little more complicated, because there is the possiblility that a VBD connects +# a VM to a CD drive, which may be empty, and thus not have a VDI to represent it. +def get_vbd_name(sx, vbd): + if sx.VBD.get_type(vbd) == "CD" and sx.VBD.get_empty(vbd) == True: + device_name = "empty cd drive" + else: + device_name = sx.VDI.get_name_label(sx.VBD.get_VDI(vbd)) + return f'VBD connecting "{sx.VM.get_name_label(sx.VBD.get_VM(vbd))}" to "{device_name}"' + + +def main(): + session = XenAPI.xapi_local() + + def logout(): + with contextlib.suppress(Exception): + session.xenapi.session.logout() + + atexit.register(logout) + + session.xenapi.login_with_password("", "", "1.0", "metrics-script") + sx = session.xenapi + + # first, we'll find all the hosts, and get the information we care about from each + hosts = sx.host.get_all() + host_metrics = [ + { + "name_label": sx.host.get_name_label(x), + "metrics": sx.host_metrics.get_record(sx.host.get_metrics(x)), + "host_cpus": [sx.host_cpu.get_record(x) for x in sx.host.get_host_CPUs(x)], + } + for x in hosts + ] + + # and print out the interesting bits + dictionary_list_partial_print( + "Host Metrics", host_metrics, ["name_label", "metrics", "host_cpus"] + ) + + # find all the virtual machines which are resident on the hosts + resident_vms = set() + for host in hosts: + resident_vms.update(sx.host.get_resident_VMs(host)) + + # get and print their info + vm_metrics = [ + { + "name_label": sx.VM.get_name_label(x), + "metrics": fetch_metrics_record(sx, x, "VM", "metrics"), + "guest_metrics": fetch_metrics_record(sx, x, "VM", "guest_metrics"), + } + for x in resident_vms + ] + + dictionary_list_partial_print( + "Virtual Machine Metrics", + vm_metrics, + ["name_label", "metrics", "guest_metrics"], + ) + + # from the list of resident VMs we can find all the active VIFs and VBDs + # however these don't have useful names, so we have to make them up + active_vifs = [ + vif for vif in sx.VIF.get_all() if sx.VIF.get_VM(vif) in resident_vms + ] + + vif_metrics = [ + { + "name_label": f'VIF connecting "{sx.network.get_name_label(sx.VIF.get_network(x))}" ' + f'to "{sx.VM.get_name_label(sx.VIF.get_VM(x))}"', + "metrics": fetch_rrd_records(sx, x, "VIF", "VM"), + } + for x in active_vifs + ] + + dictionary_list_partial_print("VIF metrics", vif_metrics, ["name_label", "metrics"]) + + active_vbds = [ + vbd for vbd in sx.VBD.get_all() if sx.VBD.get_VM(vbd) in resident_vms + ] + + vbd_metrics = [ + { + "name_label": get_vbd_name(sx, x), + "metrics": fetch_rrd_records(sx, x, "VBD", "VM"), + } + for x in active_vbds + ] + + dictionary_list_partial_print("VBD Metrics", vbd_metrics, ["name_label", "metrics"]) + + # from the VIFs we can find the active networks, which don't actually have any metrics + active_networks = set() + for vif in active_vifs: + active_networks.add(sx.VIF.get_network(vif)) + + network_metrics = [ + {"name_label": sx.network.get_name_label(x)} for x in active_networks + ] + dictionary_list_partial_print("Network Metrics", network_metrics, ["name_label"]) + + # and from the active networks we can get all the relevant pifs + active_pifs = set() + for network in active_networks: + active_pifs.update(sx.network.get_PIFs(network)) + + pif_metrics = [ + { + "name_label": f"{sx.PIF.get_device(x)} on " + f"{sx.host.get_name_label(sx.PIF.get_host(x))}", + "metrics": fetch_rrd_records(sx, x, "PIF", "host"), + } + for x in active_pifs + ] + + dictionary_list_partial_print("PIF Metrics", pif_metrics, ["name_label", "metrics"]) + + # finish off by printing out a concise list of all the active objects + # awkward duplication instead of iterating over locals()[name] is so that + # pytype does not complain + print("Active Objects") + for name, lst in [ + ("host_metrics", host_metrics), + ("vm_metrics", vm_metrics), + ("vif_metrics", vif_metrics), + ("vbd_metrics", vbd_metrics), + ("network_metrics", network_metrics), + ("pif_metrics", pif_metrics), + ]: + print(name, [(y["name_label"]) for y in lst]) + + +if __name__ == "__main__": + main() From 7269b098cf8663373c0cf54fa6a736de85f9d142 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 8 Jan 2025 16:12:44 +0000 Subject: [PATCH 040/117] python3: Resurrect a metricsgraph.py helper script This script was previously deleted in #3949, update it to Python 3 and the new API methods (VIF.get_metrics and the like were removed years ago), get rid of globals and remove the sanitycheck library usage. Install it alongside other libexec python scripts. The script outputs a short list of active objects and their graph in the graphviz format (it could be turned into an image with the help of various graphviz utilities like `echo digraph active_objects {...} | dot -Tsvg > output.svg`) ``` $ ./metricsgraph.py /* hosts : lcy2-dt29 */ /* resident VMs : CentOS Stream 9 (1), Control domain on host: lcy2-dt29, Windows 10 (64-bit) (1) */ /* active VIFs : OpaqueRef:051f732b-4897-17ab-1a03-9ab2e90a6d8e, OpaqueRef:b7291919-2f60-83a0-7777-96ffd6a73e32 */ /* active VBDs : OpaqueRef:21f94d40-1be3-1dee-d394-7c18eaaad8b4, OpaqueRef:44984583-9938-1177-e7e5-dbc4bd23f51e, OpaqueRef:6de310eb-2b0b-85c7-cd5b-d3ccbe46f3d8, OpaqueRef:543537d3-ec19-b67d-7422-545894cf6727 */ /* active networks : NPRI bond of 0 1 */ /* active PIFs : OpaqueRef:4d293e68-1c2a-c6a8-bdd9-823abca394ba */ digraph active_objects { node [shape="rect"]; "OpaqueRef:5cdd5f91-b5e2-ba4f-09df-0f4ef9b77ef4" [label="lcy2-dt29\ncpus=0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00\nmemory=0.65"]; "OpaqueRef:1d340059-3a7b-bac6-9d90-26e87ecc4a2a" [label="CentOS Stream 9 (1)\ncpus=0.00\nmemory=4095.98828125M"]; "OpaqueRef:f76036a2-17d8-c6fa-7959-1e5b10c50d3b" [label="Control domain on host: lcy2-dt29\ncpus=0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00\nmemory=2656.0M"]; "OpaqueRef:c613d702-b8d7-2313-34b5-2683accae3ed" [label="Windows 10 (64-bit) (1)\ncpus=0.00\nmemory=4097.98046875M"]; "OpaqueRef:051f732b-4897-17ab-1a03-9ab2e90a6d8e" [label="vif\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:b7291919-2f60-83a0-7777-96ffd6a73e32" [label="vif\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:21f94d40-1be3-1dee-d394-7c18eaaad8b4" [label="vbd\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:44984583-9938-1177-e7e5-dbc4bd23f51e" [label="vbd\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:6de310eb-2b0b-85c7-cd5b-d3ccbe46f3d8" [label="vbd\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:543537d3-ec19-b67d-7422-545894cf6727" [label="vbd\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:4d293e68-1c2a-c6a8-bdd9-823abca394ba" [label="pif\nread=0.00k\nwrite=0.00k"]; "OpaqueRef:cf42e13c-aa9d-c604-710c-bead3587948a" [label="NPRI bond of 0 1"]; "OpaqueRef:5cdd5f91-b5e2-ba4f-09df-0f4ef9b77ef4" -> "OpaqueRef:1d340059-3a7b-bac6-9d90-26e87ecc4a2a"; "OpaqueRef:5cdd5f91-b5e2-ba4f-09df-0f4ef9b77ef4" -> "OpaqueRef:c613d702-b8d7-2313-34b5-2683accae3ed"; "OpaqueRef:5cdd5f91-b5e2-ba4f-09df-0f4ef9b77ef4" -> "OpaqueRef:f76036a2-17d8-c6fa-7959-1e5b10c50d3b"; "OpaqueRef:1d340059-3a7b-bac6-9d90-26e87ecc4a2a" -> "OpaqueRef:b7291919-2f60-83a0-7777-96ffd6a73e32"; "OpaqueRef:c613d702-b8d7-2313-34b5-2683accae3ed" -> "OpaqueRef:051f732b-4897-17ab-1a03-9ab2e90a6d8e"; "OpaqueRef:1d340059-3a7b-bac6-9d90-26e87ecc4a2a" -> "OpaqueRef:21f94d40-1be3-1dee-d394-7c18eaaad8b4"; "OpaqueRef:1d340059-3a7b-bac6-9d90-26e87ecc4a2a" -> "OpaqueRef:6de310eb-2b0b-85c7-cd5b-d3ccbe46f3d8"; "OpaqueRef:c613d702-b8d7-2313-34b5-2683accae3ed" -> "OpaqueRef:44984583-9938-1177-e7e5-dbc4bd23f51e"; "OpaqueRef:c613d702-b8d7-2313-34b5-2683accae3ed" -> "OpaqueRef:543537d3-ec19-b67d-7422-545894cf6727"; "OpaqueRef:051f732b-4897-17ab-1a03-9ab2e90a6d8e" -> "OpaqueRef:cf42e13c-aa9d-c604-710c-bead3587948a"; "OpaqueRef:b7291919-2f60-83a0-7777-96ffd6a73e32" -> "OpaqueRef:cf42e13c-aa9d-c604-710c-bead3587948a"; "OpaqueRef:cf42e13c-aa9d-c604-710c-bead3587948a" -> "OpaqueRef:4d293e68-1c2a-c6a8-bdd9-823abca394ba"; } ``` Signed-off-by: Andrii Sultanov --- python3/Makefile | 1 + python3/libexec/metricsgraph.py | 207 ++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 python3/libexec/metricsgraph.py diff --git a/python3/Makefile b/python3/Makefile index 876b1b8ff49..1a46b50b164 100644 --- a/python3/Makefile +++ b/python3/Makefile @@ -24,6 +24,7 @@ install: $(IDATA) dnf_plugins/ptoken.py $(DESTDIR)$(SITE3_DIR)/$(DNF_PLUGIN_DIR)/ $(IPROG) libexec/metrics.py $(DESTDIR)$(OPTDIR)/debug + $(IPROG) libexec/metricsgraph.py $(DESTDIR)$(OPTDIR)/debug $(IPROG) libexec/host-display $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/link-vms-by-sr.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/usb_reset.py $(DESTDIR)$(LIBEXECDIR) diff --git a/python3/libexec/metricsgraph.py b/python3/libexec/metricsgraph.py new file mode 100644 index 00000000000..4f983172b67 --- /dev/null +++ b/python3/libexec/metricsgraph.py @@ -0,0 +1,207 @@ +#!/usr/bin/python3 + +# program to run through all the interesting metrics (i.e. those that are related in some way to a resident VM) +# and output a graph suitable for graphviz. +# note that graphviz comments are /*C-style*/ +# the fundamental type here is a 'record dictionary', indexing object records by their opaque refs. + +import atexit +import contextlib +from pprint import pprint, pformat + +import XenAPI + + +# given a set of object references of type 'type_string', return a +# dictionary linking the references to the associated records +def get_records(sx, object_set, type_string): + dic = {} + for x in object_set: + dic[x] = sx.__getattr__(type_string).get_record(x) + return dic + + +# given a record dictionary, print out the 'name_labels' of each entry if it exists, and the reference otherwise +def print_names(dictionary, title): + print("/*") + print( + title, + ":", + ", ".join([dictionary[x].get("name_label", x) for x in dictionary.keys()]), + ) + print("*/") + + +# for example, the record dictionary of VMs contains a key VIFs, which is a list of the associated VIFs +# this function will take say, the dictionary of all the resident VMs, and return the set of all the VIFs +# associated with them +def set_from_key_in_dictionary_of_records(record_dict, key_name, key_is_list=True): + s = set() + for v in record_dict.values(): + key = v[key_name] + if key_is_list: + s.update(key) + else: + s.add(key) + return s + + +# and this composition function will, say, given the VM dictionary, and the key name VIFs, return the +# dictionary of all associated VIFs +def chase_key(sx, record_dictionary, type_name, key_name, key_is_list=True): + new_set = set_from_key_in_dictionary_of_records( + record_dictionary, key_name, key_is_list + ) + return get_records(sx, new_set, type_name) + + +# The metrics records hold a lot of data. The following functions take a reference/record pair of a particular type +# get the associated metrics, and return a few interesting quantities we want to see on the graphs +def host_data(sx, ref, record): + data = {} + data["name"] = record["name_label"] + metrics = sx.host_metrics.get_record(sx.host.get_metrics(ref)) + cpu_utilisations = [ + sx.host_cpu.get_utilisation(x) for x in sx.host.get_host_CPUs(ref) + ] + data["label"] = {} + data["label"]["cpus"] = " ".join(["%.2f" % x for x in cpu_utilisations]) + data["label"]["memory"] = "%.2f" % ( + float(metrics["memory_free"]) / float(metrics["memory_total"]) + ) + return data + + +def vm_data(sx, ref, record): + data = {} + data["name"] = record["name_label"] + metrics = sx.VM_metrics.get_record(sx.VM.get_metrics(ref)) + data["label"] = {} + data["label"]["cpus"] = " ".join( + ["%.2f" % x for x in metrics["VCPUs_utilisation"].values()] + ) + data["label"]["memory"] = "%sM" % ( + float(metrics["memory_actual"]) / float(1024 * 1024) + ) + return data + + +# vifs vbds and pifs all work the same way, but we need a type variable for the xapi bindings dispatcher +def io_data(sx, ref, record, type_name, data_owner, suffixes): + data = {} + data["name"] = type_name.lower() + obj_class = sx.__getattr__(type_name) + owner_class = sx.__getattr__(data_owner) + belongs_to = obj_class.__getattr__(f"get_{data_owner}")(ref) + device_number = obj_class.__getattr__("get_device")(ref) + metric_name = f"{type_name.lower()}_{device_number}" + metrics = [ + x + for x in owner_class.get_data_sources(belongs_to) + if x["name_label"].startswith(metric_name) + ] + metrics = {x["name_label"]: x["value"] for x in metrics} + + data["label"] = {} + data["label"]["read"] = "%.2fk" % (metrics[f"{metric_name}_{suffixes[0]}"]) + data["label"]["write"] = "%.2fk" % (metrics[f"{metric_name}_{suffixes[1]}"]) + return data + + +# these functions use the object lists constructed and metric analysis functions defined above +# to print out the node and edge definitions required by graphviz +def print_nodes(record_dictionary): + for x in record_dictionary.keys(): + print('"%s" [label="%s"];' % (x, record_dictionary[x].get("name_label", "?"))) + + +# calls the metric analysis functions to output nodes labelled with their metrics as well as their names +def print_metric_nodes(sx, dic, metricfn): + for ref, record in dic.items(): + d = metricfn(sx, ref, record) + label = ( + d["name"] + + "\\n" + + "\\n".join(["%s=%s" % (k, v) for k, v in d["label"].items()]) + ) + print('"%s" [label="%s"];' % (ref, label)) + + +# prints out the connecting edges, in similar manner to the key-chasing above when we first got the objects +def print_edges(record_dictionary, key, key_is_list=True): + for k, v in record_dictionary.items(): + if key_is_list: + for x in v[key]: + print('"%s" -> "%s";' % (k, x)) + else: + print('"%s" -> "%s";' % (k, v[key])) + + +def main(): + session = XenAPI.xapi_local() + + def logout(): + with contextlib.suppress(Exception): + session.xenapi.session.logout() + + atexit.register(logout) + + session.xenapi.login_with_password("", "", "1.0", "newmetricsgraph-script") + sx = session.xenapi + + # find all the hosts + host_dic = get_records(sx, set(sx.host.get_all()), "host") + + # chase the chain of types through hosts->VMs->VIFs->networks->PIFs and hosts->VMs->VBDs + resident_vms_dic = chase_key(sx, host_dic, "VM", "resident_VMs") + active_vifs_dic = chase_key(sx, resident_vms_dic, "VIF", "VIFs") + active_vbds_dic = chase_key(sx, resident_vms_dic, "VBD", "VBDs") + active_networks_dic = chase_key(sx, active_vifs_dic, "network", "network", False) + active_pifs_dic = chase_key(sx, active_networks_dic, "PIF", "PIFs") + + # print out the objects we found as a graphviz comment + print_names(host_dic, "hosts") + print_names(resident_vms_dic, "resident VMs") + print_names(active_vifs_dic, "active VIFs") + print_names(active_vbds_dic, "active VBDs") + print_names(active_networks_dic, "active networks") + print_names(active_pifs_dic, "active PIFs") + + # We've now got all the objects we need, so we now need to get their metrics data and output it as a graphviz file + + print('digraph active_objects {') + print('node [shape="rect"];') + + print_metric_nodes(sx, host_dic, host_data) + print_metric_nodes(sx, resident_vms_dic, vm_data) + print_metric_nodes( + sx, + active_vifs_dic, + lambda sx, ref, record: io_data(sx, ref, record, "VIF", "VM", ["rx", "tx"]), + ) + print_metric_nodes( + sx, + active_vbds_dic, + lambda sx, ref, record: io_data( + sx, ref, record, "VBD", "VM", ["read", "write"] + ), + ) + print_metric_nodes( + sx, + active_pifs_dic, + lambda sx, ref, record: io_data(sx, ref, record, "PIF", "host", ["rx", "tx"]), + ) + + print_nodes(active_networks_dic) + + print_edges(host_dic, "resident_VMs") + print_edges(resident_vms_dic, "VIFs") + print_edges(resident_vms_dic, "VBDs") + print_edges(active_vifs_dic, "network", False) + print_edges(active_networks_dic, "PIFs") + + print("}") + + +if __name__ == "__main__": + main() From 6ec664817774050a4d0ab1ed18371c33f0b99266 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 041/117] xenopsd docs: Fix headings in VM.build/xenguest.md - Remove the superflous heading for the initial mermaid flowchart - Improve the description above the initial mermaid flowchart - Fix some headings from '# heading text' to '## heading text' - Rephrase the list heading for the platform data in Xenstore Signed-off-by: Bernhard Kaindl --- .../xenopsd/walkthroughs/VM.build/xenguest.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md index 66345f018ec..70908d556fb 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md @@ -3,10 +3,9 @@ title: xenguest description: "Perform building VMs: Allocate and populate the domain's system memory." --- - -# Flowchart - -xenguest is called as part of starting a new domain in VM_build: +As part of starting a new domain in VM_build, `xenopsd` calls `xenguest`. +When multiple domain build threads run in parallel, +also multiple instances of `xenguest` also run in parallel: ```mermaid flowchart @@ -27,7 +26,8 @@ libxenguest2 --> Xen end ``` -# About xenguest +## About xenguest + `xenguest` is called by the xenopsd [Domain.build](Domain.build) function to perform the build phase for new VMs, which is part of the `xenopsd` [VM.start operation](VM.start). @@ -45,7 +45,7 @@ an individual package once planned changes to the Xen hypercalls are stabilised. Over time, `xenguest` has evolved to build more of the initial domain state. -# Interface to xenguest +## Interface to xenguest ```mermaid flowchart @@ -70,7 +70,8 @@ Xenstore[Xenstore platform data] --> xenguest - The amount of system memory of the domain, - A number of other parameters that are domain-specific. -Using the Xenstore, the platform data (vCPUs, vCPU affinity, etc) is passed: +`xenopsd` uses the Xenstore to provide platform data: + - the vCPU affinity - the vCPU credit2 weight/cap parameters - whether the NX bit is exposed From 3ea71e583c1bd0200f1baf8184543292b76516d5 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 042/117] xenopsd docs: Fix the link on VM.build/VM.build.md to Domain.build.md Signed-off-by: Bernhard Kaindl --- doc/content/xenopsd/walkthroughs/VM.build/VM_build.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md index c488d9b7c1c..a9bc5eee812 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md @@ -45,12 +45,12 @@ must: 1. Run pygrub (or eliloader) to extract the kernel and initrd, if necessary 2. [Call](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2222-L2225) - [Domain.build](Domain.build) - to: + [Domain.build](Domain.build) to - optionally run NUMA placement and - invoke [xenguest](VM.build/xenguest) to set up the domain memory. - See the walk-though on [VM.build](VM.build) for more details on this phase. + See the walk-through of the [Domain.build](Domain.build) function + for more details on this phase. 3. Apply the `cpuid` configuration 4. Store the current domain configuration on disk -- it's important to know the difference between the configuration you started with and the configuration From c81f0fbc45369fb30ebd0973e7f66b7555fa0d59 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 043/117] xenopsd docs: Fix the build_pre chapter in VM.build/Domain.build.md - Fix the link to the [NUMA feature description](/toolstack/features/NUMA) - Heading of build_pre: Shorter, less likely to wrap to new line - Rephrase for clarity, move non-essential remarks into parentheses - Avoid `` in [`text`](link): The `text` not visible to be a link Signed-off-by: Bernhard Kaindl --- .../walkthroughs/VM.build/Domain.build.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md index 8514e13eefd..fb9534a68e7 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md @@ -53,21 +53,22 @@ to call: - Run `xenguest` - `xenguest` to invoke the [xenguest](xenguest) program to setup the domain's system memory. -## Domain Build Preparation using build_pre +## build_pre: Prepare building the VM -[`Domain.build`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210) +[Domain.build](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1111-L1210) [calls](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1137) -the [function `build_pre`](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L899-L964) -(which is also used for VM restore). It must: +[build_pre](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L899-L964) +(which is also used for VM restore) to: 1. [Call](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L902-L911) [wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272) - to wait, if necessary, for the Xen memory scrubber to catch up reclaiming memory (CA-39743) + to wait (if necessary), for the Xen memory scrubber to catch up reclaiming memory (CA-39743) 2. Call the hypercall to set the timer mode 3. Call the hypercall to set the number of vCPUs -4. As described in the [NUMA feature description](../../toolstack/features/NUMA), - when the `xe` configuration option `numa_placement` is set to `Best_effort`, - except when the VM has a hard affinity set, invoke the `numa_placement` function: +4. Call the `numa_placement` function + as described in the [NUMA feature description](/toolstack/features/NUMA) + when the `xe` configuration option `numa_placement` is set to `Best_effort` + (except when the VM has a hard CPU affinity). ```ml match !Xenops_server.numa_placement with From 4d8f0e56bfe43d52c40d4cc79adc3a6a6b35b1d2 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 6 Dec 2024 11:13:23 +0000 Subject: [PATCH 044/117] CP-52744: Thread `TraceContext` as json inside debug_info Adds functionality to marshal and unmarshal `TraceContext` in the tracing library. Instead of passing only the `traceparent` in `debug_info`, the entire `TraceContext` is now passed as JSON. This change enables the transfer of baggage across xenopsd boundaries, improving tracing propagation and debugging capabilities. This should also enable later use of `baggage` as means of passing the thread classification between components. Signed-off-by: Gabriel Buica --- dune-project | 2 ++ ocaml/libs/tracing/dune | 4 ++- ocaml/libs/tracing/tracing.ml | 24 +++++++++++--- ocaml/libs/tracing/tracing.mli | 8 +++++ ocaml/xapi-idl/lib/debug_info.ml | 56 +++++++++++++++++++++++++++----- xapi-tracing.opam | 2 ++ 6 files changed, 82 insertions(+), 14 deletions(-) diff --git a/dune-project b/dune-project index 806b80b189b..f47e2b8ff83 100644 --- a/dune-project +++ b/dune-project @@ -103,11 +103,13 @@ dune (alcotest :with-test) (fmt :with-test) + ppx_deriving_yojson re uri (uuid :with-test) (xapi-log (= :version)) (xapi-stdext-threads (= :version)) + yojson ) (synopsis "Allows to instrument code to generate tracing information") (description "This library provides modules to allow gathering runtime traces.") diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 71e5c7b7473..97c7e470e87 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -1,7 +1,9 @@ (library (name tracing) (modules tracing) - (libraries re uri xapi-log xapi-stdext-threads threads.posix) + (libraries re uri yojson xapi-log xapi-stdext-threads threads.posix) + (preprocess + (pps ppx_deriving_yojson)) (public_name xapi-tracing)) (library diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 8beff835cec..5cbfd6b26e9 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -211,11 +211,12 @@ end (* The context of a trace that can be propagated across service boundaries. *) module TraceContext = struct - type traceparent = string + type traceparent = string [@@deriving yojson] - type baggage = (string * string) list + type baggage = (string * string) list [@@deriving yojson] type t = {traceparent: traceparent option; baggage: baggage option} + [@@deriving yojson] let empty = {traceparent= None; baggage= None} @@ -226,6 +227,10 @@ module TraceContext = struct let traceparent_of ctx = ctx.traceparent let baggage_of ctx = ctx.baggage + + let to_json_string t = Yojson.Safe.to_string (to_yojson t) + + let of_json_string s = of_yojson (Yojson.Safe.from_string s) end module SpanContext = struct @@ -297,6 +302,8 @@ module Span = struct let get_context t = t.context + let get_trace_context t = t.context |> SpanContext.context_of_span_context + let start ?(attributes = Attributes.empty) ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = let trace_id, extra_context = @@ -312,9 +319,9 @@ module Span = struct in let context = (* If trace_context is provided to the call, override any inherited trace context. *) - Option.fold ~none:context - ~some:(Fun.flip SpanContext.with_trace_context context) - trace_context + trace_context + |> Option.fold ~none:context + ~some:(Fun.flip SpanContext.with_trace_context context) in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in @@ -411,6 +418,13 @@ module Span = struct {span with status= {status_code; _description}} | _ -> span + + let to_propagation_context span = + let traceparent = span |> get_context |> SpanContext.to_traceparent in + span + |> get_context + |> SpanContext.context_of_span_context + |> TraceContext.with_traceparent (Some traceparent) end module TraceMap = Map.Make (Trace_id) diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index d20fda8c2e1..c4a12e65c4f 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -94,6 +94,10 @@ module TraceContext : sig val traceparent_of : t -> traceparent option val baggage_of : t -> baggage option + + val to_json_string : t -> string + + val of_json_string : string -> (t, string) result end module SpanContext : sig @@ -119,6 +123,8 @@ module Span : sig val get_context : t -> SpanContext.t + val get_trace_context : t -> TraceContext.t + val add_link : t -> SpanContext.t -> (string * string) list -> t val add_event : t -> string -> (string * string) list -> t @@ -140,6 +146,8 @@ module Span : sig val get_end_time : t -> float option val get_attributes : t -> (string * string) list + + val to_propagation_context : t -> TraceContext.t end module TraceMap : module type of Map.Make (Trace_id) diff --git a/ocaml/xapi-idl/lib/debug_info.ml b/ocaml/xapi-idl/lib/debug_info.ml index 599537ff5b1..5483d6bc451 100644 --- a/ocaml/xapi-idl/lib/debug_info.ml +++ b/ocaml/xapi-idl/lib/debug_info.ml @@ -22,10 +22,34 @@ let make ~log ~tracing = {log; tracing} let of_string s = let open Tracing in match String.split_on_char separator s with - | [log; traceparent] -> - let spancontext = SpanContext.of_traceparent traceparent in + | [log; trace_context] -> + (* Process the tracing data: + 1. We expect a JSON representing the trace_context. + 2. If the JSON is valid but not representing a trace_context, + we ignore the tracing data. + 3. If we get an exception from parsing the JSON string, + it means a traceparent string was received.*) + let trace_context = + try + let trace_context = + Tracing.TraceContext.of_json_string trace_context + in + match trace_context with + | Ok trace_context -> + Some trace_context + | Error _ -> + None + with _ -> + Some + (TraceContext.empty + |> TraceContext.with_traceparent (Some trace_context) + ) + in + let spancontext = + Option.(join (map Tracing.SpanContext.of_trace_context trace_context)) + in let tracing = - Option.map (fun tp -> Tracer.span_of_span_context tp log) spancontext + Option.map (Fun.flip Tracer.span_of_span_context log) spancontext in {log; tracing} | _ -> @@ -37,11 +61,13 @@ let filter_separator = Astring.String.filter (( <> ) separator) let to_string t = Option.fold ~none:t.log ~some:(fun span -> - let traceparent = - Tracing.Span.get_context span |> Tracing.SpanContext.to_traceparent + let trace_context = + span + |> Tracing.Span.to_propagation_context + |> Tracing.TraceContext.to_json_string in Printf.sprintf "%s%c%s" (filter_separator t.log) separator - (filter_separator traceparent) + (filter_separator trace_context) ) t.tracing @@ -68,7 +94,21 @@ let with_dbg ?(with_thread = false) ~module_name ~name ~dbg f = let traceparent_of_dbg dbg = match String.split_on_char separator dbg with - | [_; traceparent] -> - Some traceparent + | [_; trace_context] -> ( + (* Process the tracing data: + 1. We expect a JSON representing the trace_context. + 2. If the JSON is valid but not representing a trace_context, + we ignore the tracing data. + 3. If we get an exception from parsing the JSON string, + it means a traceparent string was received.*) + try + let trace_context = Tracing.TraceContext.of_json_string trace_context in + match trace_context with + | Ok trace_context -> + Tracing.TraceContext.traceparent_of trace_context + | Error _ -> + None + with _ -> Some trace_context + ) | _ -> None diff --git a/xapi-tracing.opam b/xapi-tracing.opam index b9cac8ba0dd..f5c0df48bfe 100644 --- a/xapi-tracing.opam +++ b/xapi-tracing.opam @@ -13,11 +13,13 @@ depends: [ "dune" {>= "3.15"} "alcotest" {with-test} "fmt" {with-test} + "ppx_deriving_yojson" "re" "uri" "uuid" {with-test} "xapi-log" {= version} "xapi-stdext-threads" {= version} + "yojson" "odoc" {with-doc} ] build: [ From c64c5b063f6ba9352af3926d63a8f86ef6c91abc Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 23 Jan 2025 08:53:03 +0000 Subject: [PATCH 045/117] CP-52744: Update `trace_context` when starting a context span Refresh the trace_context with the correct traceparent when creating a span with `start_tracing_helper` in `context.ml`. This ensures the tracing of the context has the correct parent. Signed-off-by: Gabriel Buica --- ocaml/libs/tracing/tracing.ml | 6 ++++++ ocaml/libs/tracing/tracing.mli | 2 ++ ocaml/xapi/context.ml | 6 +++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 5cbfd6b26e9..3feaeef0c77 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -425,6 +425,12 @@ module Span = struct |> get_context |> SpanContext.context_of_span_context |> TraceContext.with_traceparent (Some traceparent) + + let with_trace_context span trace_context = + let span_context = + span |> get_context |> SpanContext.with_trace_context trace_context + in + {span with context= span_context} end module TraceMap = Map.Make (Trace_id) diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index c4a12e65c4f..50556740b35 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -148,6 +148,8 @@ module Span : sig val get_attributes : t -> (string * string) list val to_propagation_context : t -> TraceContext.t + + val with_trace_context : t -> TraceContext.t -> t end module TraceMap : module type of Map.Make (Trace_id) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 5f357e110af..b71ed4ca234 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -337,7 +337,11 @@ let start_tracing_helper ?(span_attributes = []) parent_fn task_name = ~parent () with | Ok x -> - x + Option.map + (fun span -> + span |> Span.to_propagation_context |> Span.with_trace_context span + ) + x | Error e -> R.warn "Failed to start tracing: %s" (Printexc.to_string e) ; None From 519a4a0107c298b1b0e291cf6e12ddcbd3fc1e36 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 12 Feb 2025 15:07:56 +0000 Subject: [PATCH 046/117] http-lib: test invalid Accept header Signed-off-by: Pau Ruiz Safont --- ocaml/libs/http-lib/http_test.ml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ocaml/libs/http-lib/http_test.ml b/ocaml/libs/http-lib/http_test.ml index 45a8bab987f..ebb7c505662 100644 --- a/ocaml/libs/http-lib/http_test.ml +++ b/ocaml/libs/http-lib/http_test.ml @@ -24,6 +24,15 @@ module Accept = struct let actual = Accept.of_string data in Alcotest.(check @@ list accept) data expected actual + let test_invalid () = + let data = "text/html, image/gif, image/jpeg, ; q=.2, */; q=.2" in + let expected = Accept.Parse_failure " " in + let actual () = + let _ = Accept.of_string data in + () + in + Alcotest.check_raises "Raises Parse failure" expected actual + let test_accept_complex () = let data = "application/xml;q=0.9,text/html,application/xhtml+xml,*/*;q=0.8" @@ -81,6 +90,7 @@ module Accept = struct [ ("Simple", `Quick, test_accept_simple) ; ("Complex", `Quick, test_accept_complex) + ; ("Invalid", `Quick, test_invalid) ] ; preferred_tests ] From 7e64a2696bbc01e58fc3202c076be3583443518f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 13 Feb 2025 11:05:20 +0000 Subject: [PATCH 047/117] CA-406403: Do not return HTTP 500 when Accept header can't be parsed /update_rrds returned a 500 HTTP code in some cases where the accept header was invalid. Now these cases are treated in the same way as a lack of Accept header. Signed-off-by: Pau Ruiz Safont --- ocaml/libs/http-lib/http.mli | 3 +++ ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml | 27 ++++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 114ddbc4f45..e0c972586c7 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -55,6 +55,9 @@ module Accept : sig val equal : t -> t -> bool val of_string : string -> t list + (** [of_string accept_hdr] Returns a list of weighted media types represented + by [accept_hdr]. If [accept_hdr] can't be parsed, raises [Parse_failure]. + *) val to_string : t -> string diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml index 60f4c75dac0..0f0c3e5ffbc 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml @@ -18,20 +18,19 @@ let content_xml = content_hdr_of_mime mime_xml let client_prefers_json req = let module Accept = Http.Accept in - match req.Http.Request.accept with - | None -> - List.mem_assoc "json" req.Http.Request.query - | Some accept -> ( - let accepted = Accept.of_string accept in - let negotiated = Accept.preferred ~from:[mime_json; mime_xml] accepted in - match negotiated with - | x :: _ when String.equal x mime_json -> - true - | [] -> - List.mem_assoc "json" req.Http.Request.query - | _ -> - false - ) + let ( let* ) = Option.bind in + let map_head f lst = List.nth_opt lst 0 |> Option.map f in + let prefers_json = + let* accept = req.Http.Request.accept in + let* accepted = + try Some (Accept.of_string accept) with Accept.Parse_failure _ -> None + in + Accept.preferred ~from:[mime_json; mime_xml] accepted + |> map_head (fun x -> String.equal x mime_json) + in + Option.value + ~default:(List.mem_assoc "json" req.Http.Request.query) + prefers_json let content_type json = if json then content_json else content_xml From 2430ffec308036f0a92ed0a1e0a6650caadad9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Feb 2025 14:01:37 +0000 Subject: [PATCH 048/117] fix(CI) Coveralls is down for maintenance: do not fail the CI if coveralls is down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the official recommendation from Coveralls: > While our API is in maintenance mode, new coverage report uploads (POSTs to /api/v1/jobs) will fail with a 405 or other 4xx error. > To keep this from breaking your CI builds and holding up your PRs, allow coveralls steps to "fail on error." > If you are using one of our Official Integrations, add: > - `fail-on-error: false` if using Coveralls GitHub Action Signed-off-by: Edwin Török --- .github/workflows/other.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 7cac6522c2c..602359d37ab 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -56,6 +56,7 @@ jobs: files: .git/coverage${{matrix.python-version}}.xml flag-name: python${{matrix.python-version}} parallel: true + fail-on-error: false - uses: dciborow/action-pylint@0.1.0 with: From 51e48a6a60947fe593727f21b9c477a059560f5d Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Thu, 13 Feb 2025 12:00:00 +0100 Subject: [PATCH 049/117] Python coverage: continue-on-error -> fail-on-error: false Signed-off-by: Bernhard Kaindl --- .github/workflows/other.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 7cac6522c2c..9b1e6dbb2ac 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -89,8 +89,8 @@ jobs: - name: Finish the parallel coverage upload to Coveralls uses: coverallsapp/github-action@v2 with: + fail-on-error: false parallel-finished: true - continue-on-error: true # Do not fail CI if this step fails deprecation-test: name: Deprecation tests From e73476e928dff73c256a3a1ed4d2dd14b9dd1bc1 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 17:59:02 +0100 Subject: [PATCH 050/117] docs: Update the VM.build flowchart, include it where useful for context Signed-off-by: Bernhard Kaindl --- .../walkthroughs/VM.build/VM_build-chart.md | 27 +++++++++++++++++++ .../xenopsd/walkthroughs/VM.build/VM_build.md | 18 +++---------- .../xenopsd/walkthroughs/VM.build/_index.md | 18 +++---------- 3 files changed, 33 insertions(+), 30 deletions(-) create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/VM_build-chart.md diff --git a/doc/content/xenopsd/walkthroughs/VM.build/VM_build-chart.md b/doc/content/xenopsd/walkthroughs/VM.build/VM_build-chart.md new file mode 100644 index 00000000000..eec1f05fc0e --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/VM_build-chart.md @@ -0,0 +1,27 @@ +--- +hidden: true +title: VM_build micro-op flowchart +description: For inclusion in _index.md and VM_build.md +weight: 10 +--- + +```mermaid +flowchart +subgraph xenopsd VM_build[xenopsd: VM_build micro#8209;op] +direction LR +VM_build --> VM.build +VM.build --> VM.build_domain +VM.build_domain --> VM.build_domain_exn +VM.build_domain_exn --> Domain.build +click VM_build " +https://github.com/xapi-project/xen-api/blob/83555067/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271" _blank +click VM.build " +https://github.com/xapi-project/xen-api/blob/83555067/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291" _blank +click VM.build_domain " +https://github.com/xapi-project/xen-api/blob/83555067/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288" _blank +click VM.build_domain_exn " +https://github.com/xapi-project/xen-api/blob/83555067/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248" _blank +click Domain.build " +https://github.com/xapi-project/xen-api/blob/83555067/ocaml/xenopsd/xc/domain.ml#L1111-L1210" _blank +end +``` diff --git a/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md index c488d9b7c1c..073202ddd4e 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/VM_build.md @@ -3,6 +3,8 @@ title: VM_build micro-op linkTitle: VM_build μ-op description: Overview of the VM_build μ-op (runs after the VM_create μ-op created the domain). weight: 10 +mermaid: + force: true --- ## Overview @@ -23,21 +25,7 @@ which calls [VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248) which calls [Domain.build](Domain.build): -```mermaid -flowchart -subgraph xenopsd VM_build[xenopsd VM_build micro#8209;op] -direction LR -VM_build --> VM.build -VM.build --> VM.build_domain -VM.build_domain --> VM.build_domain_exn -VM.build_domain_exn --> Domain.build -click VM_build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271" _blank -click VM.build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291" _blank -click VM.build_domain "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288" _blank -click VM.build_domain_exn "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248" _blank -click Domain.build "../Domain.build/index.html" -end -``` +{{% include "VM_build-chart.md" %}} The function [VM.build_domain_exn](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024) diff --git a/doc/content/xenopsd/walkthroughs/VM.build/_index.md b/doc/content/xenopsd/walkthroughs/VM.build/_index.md index 0a5d73d70cf..63770bf6bdc 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/_index.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/_index.md @@ -2,23 +2,11 @@ title: Building a VM description: After VM_create, VM_build builds the core of the domain (vCPUs, memory) weight: 20 +mermaid: + force: true --- +{{% include "VM_build-chart.md" %}} Walk-through documents for the `VM_build` phase: -```mermaid -flowchart -subgraph xenopsd VM_build[xenopsd VM_build micro#8209;op] -direction LR -VM_build --> VM.build -VM.build --> VM.build_domain -VM.build_domain --> VM.build_domain_exn -VM.build_domain_exn --> Domain.build -click VM_build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/lib/xenops_server.ml#L2255-L2271" _blank -click VM.build "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2290-L2291" _blank -click VM.build_domain "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2250-L2288" _blank -click VM.build_domain_exn "https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/xenops_server_xen.ml#L2024-L2248" _blank -end -``` - {{% children description=true %}} From aca86bc74f571f5f9a75f81bd747278304681507 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 13 Feb 2025 12:39:36 +0000 Subject: [PATCH 051/117] Replace startswith and endswith with stdlib calls Drops `startswith` and `endswith` from `String` (within `xapi-stdext/xapi-stdext-std`). These functions were introduced in OCaml 4.13: ``` String.starts_with : prefix:string -> string -> bool ``` Usage sites are adapted to supply the relevant labelled argument in each case, e.g. ~prefix:"foo". Signed-off-by: Colin James --- ocaml/database/parse_db_conf.ml | 2 +- ocaml/idl/datamodel_utils.ml | 2 +- .../lib/xapi-stdext-std/xstringext.ml | 10 ---- .../lib/xapi-stdext-std/xstringext.mli | 6 --- .../lib/xapi-stdext-std/xstringext_test.ml | 48 +------------------ ocaml/mpathalert/mpathalert.ml | 2 +- ocaml/quicktest/qt.ml | 2 +- ocaml/rrd2csv/src/rrd2csv.ml | 4 +- ocaml/xapi/extauth_plugin_ADpbis.ml | 2 +- ocaml/xapi/fileserver.ml | 2 +- ocaml/xapi/gpg.ml | 4 +- ocaml/xapi/import.ml | 8 ++-- ocaml/xapi/nm.ml | 2 +- ocaml/xapi/workload_balancing.ml | 2 +- ocaml/xapi/xapi_network.ml | 2 +- ocaml/xapi/xapi_pif.ml | 2 +- ocaml/xapi/xapi_pool.ml | 6 +-- ocaml/xapi/xapi_pool_update.ml | 2 +- ocaml/xapi/xapi_secret.ml | 6 +-- ocaml/xapi/xapi_xenops.ml | 8 +++- ocaml/xe-cli/newcli.ml | 4 +- 21 files changed, 34 insertions(+), 92 deletions(-) diff --git a/ocaml/database/parse_db_conf.ml b/ocaml/database/parse_db_conf.ml index 852ace7d9f4..8eb55ee2afe 100644 --- a/ocaml/database/parse_db_conf.ml +++ b/ocaml/database/parse_db_conf.ml @@ -168,7 +168,7 @@ let parse_db_conf s = let connections : db_connection list ref = ref [] in while !lines <> [] do let line = List.hd !lines in - if String.startswith "[" line then + if String.starts_with ~prefix:"[" line then connections := read_block () :: !connections else consume_line () diff --git a/ocaml/idl/datamodel_utils.ml b/ocaml/idl/datamodel_utils.ml index 080d9059ab8..13c26531594 100644 --- a/ocaml/idl/datamodel_utils.ml +++ b/ocaml/idl/datamodel_utils.ml @@ -183,7 +183,7 @@ let find_self_parameter (msg : message) = ) let plural name = - if Xstringext.String.endswith "metrics" name then + if String.ends_with ~suffix:"metrics" name then name ^ " instances" else name ^ "s" diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.ml index 0b3da00c476..16f60dedbae 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.ml @@ -39,16 +39,6 @@ module String = struct done ; !accu - (** True if string 'x' ends with suffix 'suffix' *) - let endswith suffix x = - let x_l = String.length x and suffix_l = String.length suffix in - suffix_l <= x_l && String.sub x (x_l - suffix_l) suffix_l = suffix - - (** True if string 'x' starts with prefix 'prefix' *) - let startswith prefix x = - let x_l = String.length x and prefix_l = String.length prefix in - prefix_l <= x_l && String.sub x 0 prefix_l = prefix - (** Returns true for whitespace characters, false otherwise *) let isspace = function ' ' | '\n' | '\r' | '\t' -> true | _ -> false diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.mli index e2b486285a6..1f27490493d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext.mli @@ -29,12 +29,6 @@ module String : sig val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a (** Iterate over the characters in a string in reverse order. *) - val endswith : string -> string -> bool - (** True if string 'x' ends with suffix 'suffix' *) - - val startswith : string -> string -> bool - (** True if string 'x' starts with prefix 'prefix' *) - val isspace : char -> bool (** True if the character is whitespace *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext_test.ml index b0816e69ebb..145ce632bbc 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/xstringext_test.ml @@ -127,44 +127,6 @@ let test_has_substr = in ("has_substr", List.map test spec) -let test_startswith = - let spec = - [ - ("", "", true) - ; ("", "foo bar", true) - ; ("foofo", "foof", false) - ; ("foof", "foof", true) - ; ("f", "foof", true) - ; ("fo", "foof", true) - ; ("of", "foof", false) - ; ("ff", "foof", false) - ] - in - let test (contained, container, expected) = - let name = Printf.sprintf {|"%s" starts with "%s"|} container contained in - test_boolean (XString.startswith contained) (name, container, expected) - in - ("startswith", List.map test spec) - -let test_endswith = - let spec = - [ - ("", "", true) - ; ("", "foo bar", true) - ; ("ofoof", "foof", false) - ; ("foof", "foof", true) - ; ("f", "foof", true) - ; ("fo", "foof", false) - ; ("of", "foof", true) - ; ("ff", "foof", false) - ] - in - let test (contained, container, expected) = - let name = Printf.sprintf {|"%s" ends with "%s"|} container contained in - test_boolean (XString.endswith contained) (name, container, expected) - in - ("endswith", List.map test spec) - let test_rtrim = let spec = [ @@ -187,12 +149,4 @@ let test_rtrim = let () = Alcotest.run "Xstringext" - [ - test_rev_map - ; test_split - ; test_split_f - ; test_has_substr - ; test_startswith - ; test_endswith - ; test_rtrim - ] + [test_rev_map; test_split; test_split_f; test_has_substr; test_rtrim] diff --git a/ocaml/mpathalert/mpathalert.ml b/ocaml/mpathalert/mpathalert.ml index 3a5d2556bd1..a5533c0fe54 100644 --- a/ocaml/mpathalert/mpathalert.ml +++ b/ocaml/mpathalert/mpathalert.ml @@ -95,7 +95,7 @@ let rec retry_with_session f rpc x = retry_with_session f rpc x let keep_mpath = - List.filter (fun (key, _) -> Xstringext.String.startswith "mpath-" key) + List.filter (fun (key, _) -> String.starts_with ~prefix:"mpath-" key) let create_alert ~host_uuid_string ~host_name ~pbd_uuid_string key value timestamp scsi_id = diff --git a/ocaml/quicktest/qt.ml b/ocaml/quicktest/qt.ml index 3b6b7cd9743..7045ae3c27c 100644 --- a/ocaml/quicktest/qt.ml +++ b/ocaml/quicktest/qt.ml @@ -122,7 +122,7 @@ module VM = struct match List.filter (fun self -> - Xapi_stdext_std.Xstringext.String.startswith startswith + String.starts_with ~prefix:startswith (Client.Client.VM.get_name_label ~rpc ~session_id ~self) && Client.Client.VM.get_is_a_template ~rpc ~session_id ~self ) diff --git a/ocaml/rrd2csv/src/rrd2csv.ml b/ocaml/rrd2csv/src/rrd2csv.ml index b22c430f656..9c19a6a321f 100644 --- a/ocaml/rrd2csv/src/rrd2csv.ml +++ b/ocaml/rrd2csv/src/rrd2csv.ml @@ -282,7 +282,7 @@ module Ds_selector = struct match the non-null fields of f *) let filter11 f d = true - && (Xstringext.String.startswith f.metric d.metric || f.metric = "") + && (String.starts_with ~prefix:f.metric d.metric || f.metric = "") && (f.cf = d.cf || f.cf = None) && ( match (f.owner, d.owner) with | None, _ -> @@ -601,7 +601,7 @@ let print_last session_id data_sources = let filter_ds_that_starts_with_name dss name = List.fold_left (fun acc ds -> - if Xstringext.String.startswith name ds.Ds_selector.metric then + if String.starts_with ~prefix:name ds.Ds_selector.metric then ds :: acc else acc diff --git a/ocaml/xapi/extauth_plugin_ADpbis.ml b/ocaml/xapi/extauth_plugin_ADpbis.ml index 0e9bd3e44f8..56f723ff6f3 100644 --- a/ocaml/xapi/extauth_plugin_ADpbis.ml +++ b/ocaml/xapi/extauth_plugin_ADpbis.ml @@ -363,7 +363,7 @@ module AuthADlw : Auth_signature.AUTH_MODULE = struct let errcode = List.hd (List.filter - (fun w -> String.startswith "LW_ERROR_" w) + (fun w -> String.starts_with ~prefix:"LW_ERROR_" w) (split_to_words errcodeline) ) in diff --git a/ocaml/xapi/fileserver.ml b/ocaml/xapi/fileserver.ml index 4931d419918..5780a87720c 100644 --- a/ocaml/xapi/fileserver.ml +++ b/ocaml/xapi/fileserver.ml @@ -78,7 +78,7 @@ let send_file (uri_base : string) (dir : string) (req : Request.t) let file_path = Xapi_stdext_unix.Unixext.resolve_dot_and_dotdot file_path in - if not (String.startswith dir file_path) then ( + if not (String.starts_with ~prefix:dir file_path) then ( debug "Rejecting request for file: %s (outside of directory %s)" file_path dir ; Http_svr.response_forbidden ~req s diff --git a/ocaml/xapi/gpg.ml b/ocaml/xapi/gpg.ml index a6c0ec7daaa..1dd5c8141c8 100644 --- a/ocaml/xapi/gpg.ml +++ b/ocaml/xapi/gpg.ml @@ -28,7 +28,7 @@ exception InvalidSignature let parse_gpg_status status_data = let lines = String.split '\n' status_data in let status_contains substr = - List.exists (fun s -> String.startswith substr s) lines + List.exists (fun s -> String.starts_with ~prefix:substr s) lines in if not @@ -40,7 +40,7 @@ let parse_gpg_status status_data = let validsig = "[GNUPG:] VALIDSIG" in if status_contains validsig then let validsigline = - List.find (fun s -> String.startswith validsig s) lines + List.find (fun s -> String.starts_with ~prefix:validsig s) lines in match String.split ' ' validsigline with | _ :: _ :: fingerprint :: _ -> diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index 6f2c29acca8..c3abaf34b00 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -189,7 +189,7 @@ let assert_can_restore_backup ~__context rpc session_id (x : header) = let get_vm_uuid_of_snap s = let snapshot_of = Ref.string_of s.API.vM_snapshot_of in try - if Xstringext.String.startswith "Ref:" snapshot_of then + if String.starts_with ~prefix:"Ref:" snapshot_of then (* This should be a snapshot in the archive *) let v = List.find @@ -198,7 +198,7 @@ let assert_can_restore_backup ~__context rpc session_id (x : header) = in let v = get_vm_record v.snapshot in Some v.API.vM_uuid - else if Xstringext.String.startswith Ref.ref_prefix snapshot_of then + else if String.starts_with ~prefix:Ref.ref_prefix snapshot_of then (* This should be a snapshot in a live system *) if Db.is_valid_ref __context s.API.vM_snapshot_of then Some (Db.VM.get_uuid ~__context ~self:s.API.vM_snapshot_of) @@ -2313,9 +2313,7 @@ let read_map_params name params = (* include ':' *) let filter_params = List.filter - (fun (p, _) -> - Xstringext.String.startswith name p && String.length p > len - ) + (fun (p, _) -> String.starts_with ~prefix:name p && String.length p > len) params in List.map diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 1483106ace5..abb79c7be35 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -215,7 +215,7 @@ let create_bond ~__context bond mtu persistent = let overrides = List.filter_map (fun (k, v) -> - if String.startswith "bond-" k then + if String.starts_with ~prefix:"bond-" k then Some (String.sub_to_end k 5, v) else None diff --git a/ocaml/xapi/workload_balancing.ml b/ocaml/xapi/workload_balancing.ml index bdca5c4199a..27fa184da84 100644 --- a/ocaml/xapi/workload_balancing.ml +++ b/ocaml/xapi/workload_balancing.ml @@ -233,7 +233,7 @@ let wlb_request host meth body encoded_auth = let filtered_headers headers = List.map (fun s -> - if String.startswith "Authorization:" s then + if String.starts_with ~prefix:"Authorization:" s then "Authorization: Basic " else s diff --git a/ocaml/xapi/xapi_network.ml b/ocaml/xapi/xapi_network.ml index 37d527a2a34..73a59fd698c 100644 --- a/ocaml/xapi/xapi_network.ml +++ b/ocaml/xapi/xapi_network.ml @@ -252,7 +252,7 @@ let create ~__context ~name_label ~name_description ~mTU ~other_config ~bridge (not is_internal_session) && (String.length bridge > 15 || List.exists - (fun s -> String.startswith s bridge) + (fun s -> String.starts_with ~prefix:s bridge) bridge_blacklist ) then diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index 0284a134a68..1e82189f293 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -132,7 +132,7 @@ let refresh_all ~__context ~host = List.iter (fun self -> refresh_internal ~__context ~self) pifs let bridge_naming_convention (device : string) = - if String.startswith "eth" device then + if String.starts_with ~prefix:"eth" device then "xenbr" ^ String.sub device 3 (String.length device - 3) else "br" ^ device diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 97e0617bff1..1ab4b96986a 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -2839,8 +2839,8 @@ let enable_external_auth ~__context ~pool:_ ~config ~service_name ~auth_type = (Api_errors.auth_unknown_type, [msg_of_e]) ) | err_of_e - when Xstringext.String.startswith - Api_errors.auth_enable_failed err_of_e -> + when String.starts_with ~prefix:Api_errors.auth_enable_failed + err_of_e -> raise (Api_errors.Server_error ( Api_errors.pool_auth_prefix ^ err_of_e @@ -2923,7 +2923,7 @@ let disable_external_auth ~__context ~pool:_ ~config = debug "Failed to disable the external authentication of at least one \ host in the pool" ; - if Xstringext.String.startswith Api_errors.auth_disable_failed err + if String.starts_with ~prefix:Api_errors.auth_disable_failed err then (* tagged exception *) raise (Api_errors.Server_error diff --git a/ocaml/xapi/xapi_pool_update.ml b/ocaml/xapi/xapi_pool_update.ml index c7f3b4ebdfb..2307eca8589 100644 --- a/ocaml/xapi/xapi_pool_update.ml +++ b/ocaml/xapi/xapi_pool_update.ml @@ -817,7 +817,7 @@ let pool_update_download_handler (req : Request.t) s _ = if host_uuid <> localhost_uuid then proxy_request req s host_uuid else if - (not (String.startswith !Xapi_globs.host_update_dir filepath)) + (not (String.starts_with ~prefix:!Xapi_globs.host_update_dir filepath)) || not (Sys.file_exists filepath) then ( debug diff --git a/ocaml/xapi/xapi_secret.ml b/ocaml/xapi/xapi_secret.ml index af220124b12..57492840ead 100644 --- a/ocaml/xapi/xapi_secret.ml +++ b/ocaml/xapi/xapi_secret.ml @@ -41,7 +41,7 @@ let clean_out_passwds ~__context strmap = Db.Secret.destroy ~__context ~self:s with _ -> () in - let check_key (k, _) = String.endswith "password_secret" k in + let check_key (k, _) = String.ends_with ~suffix:"password_secret" k in let secrets = List.map snd (List.filter check_key strmap) in List.iter delete_secret secrets @@ -55,7 +55,7 @@ let copy ~__context ~secret = (* Modify a ((string * string) list) by duplicating all the passwords found in * it *) let duplicate_passwds ~__context strmap = - let check_key k = String.endswith "password_secret" k in + let check_key k = String.ends_with ~suffix:"password_secret" k in let possibly_duplicate (k, v) = if check_key k then let sr = Db.Secret.get_by_uuid ~__context ~uuid:v in @@ -70,7 +70,7 @@ let duplicate_passwds ~__context strmap = let move_passwds_to_secrets ~__context strmap = let maybe_move (k, value) = - if String.endswith "password" k then ( + if String.ends_with ~suffix:"password" k then ( let new_k = k ^ "_secret" in warn "Replacing deprecated %s with %s, please avoid using %s in \ diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 1d17bc5b768..852852cdc2d 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -1643,7 +1643,11 @@ module Xenopsd_metadata = struct (List.assoc Xapi_globs.persist_xenopsd_md oc) |> Xapi_stdext_unix.Unixext.resolve_dot_and_dotdot in - if not (String.startswith Xapi_globs.persist_xenopsd_md_root file_path) + if + not + (String.starts_with ~prefix:Xapi_globs.persist_xenopsd_md_root + file_path + ) then warn "Not persisting xenopsd metadata to bad location: '%s'" file_path else ( @@ -1893,7 +1897,7 @@ let update_vm ~__context id = let results = List.filter_map (fun (path, _) -> - if String.startswith dir path then + if String.starts_with ~prefix:dir path then let rest = String.sub path (String.length dir) (String.length path - String.length dir) diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index 1b729cf6523..c33e32a2e0a 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -118,7 +118,9 @@ let rec read_rest_of_headers ic = debug "read '%s'\n" r ; let hdr = List.find - (fun s -> String.startswith (s ^ ": ") (String.lowercase_ascii r)) + (fun s -> + String.starts_with ~prefix:(s ^ ": ") (String.lowercase_ascii r) + ) hdrs in let value = end_of_string r (String.length hdr + 2) in From 5abc2faefca599bf98c920e94e1a50d8677b3a1c Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 12 Feb 2025 12:00:00 +0100 Subject: [PATCH 052/117] Domain.build docs: Improve notes on node_affinity, move to new page Improve the remarks on node_affinity and move them into a dedicated walk-though. Also add a diagram for xc_domain_node_setaffinity(). Signed-off-by: Bernhard Kaindl --- doc/content/lib/_index.md | 5 ++ doc/content/lib/xenctrl/_index.md | 5 ++ .../lib/xenctrl/xc_domain_node_setaffinity.md | 88 +++++++++++++++++++ .../walkthroughs/VM.build/Domain.build.md | 29 +++--- 4 files changed, 110 insertions(+), 17 deletions(-) create mode 100644 doc/content/lib/_index.md create mode 100644 doc/content/lib/xenctrl/_index.md create mode 100644 doc/content/lib/xenctrl/xc_domain_node_setaffinity.md diff --git a/doc/content/lib/_index.md b/doc/content/lib/_index.md new file mode 100644 index 00000000000..a0592427b0b --- /dev/null +++ b/doc/content/lib/_index.md @@ -0,0 +1,5 @@ +--- +title: Libraries +hidden: true +--- +{{% children description=true %}} \ No newline at end of file diff --git a/doc/content/lib/xenctrl/_index.md b/doc/content/lib/xenctrl/_index.md new file mode 100644 index 00000000000..d38c927b83f --- /dev/null +++ b/doc/content/lib/xenctrl/_index.md @@ -0,0 +1,5 @@ +--- +title: libxenctrl +description: Xen Control library for controlling the Xen hypervisor +--- +{{% children description=true %}} \ No newline at end of file diff --git a/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md b/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md new file mode 100644 index 00000000000..82cf2e36f08 --- /dev/null +++ b/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md @@ -0,0 +1,88 @@ +--- +title: xc_domain_node_setaffinity() +description: Set a Xen domain's NUMA node affinity +--- + +`xc_domain_node_setaffinity()` controls the NUMA node affinity of a domain. + +By default, Xen enables the `auto_node_affinity` feature flag, +where setting the vCPU affinity also sets the NUMA node affinity for +memory allocations to be aligned with the vCPU affinity of the domain. + +Setting the NUMA node affinity using this call can be used, +for example, when there might not be enough memory on the +preferred NUMA node, but there are other NUMA nodes that have +enough free memory to be used for the system memory of the domain. + +In terms of future NUMA design, it might be even more favourable to +have a strategy in `xenguest` where in such cases, the superpages +of the preferred node are used first and a fallback to neighbouring +NUMA nodes only happens to the extent necessary. + +Likely, the future allocation strategy should be passed to `xenguest` +using Xenstore like the other platform parameters for the VM. + +## Walk-through of xc_domain_node_setaffinity() + +```mermaid +classDiagram +class `xc_domain_node_setaffinity()` { + +xch: xc_interface #42; + +domid: uint32_t + +nodemap: xc_nodemap_t + 0(on success) + -EINVAL(if a node in the nodemask is not online) +} +click `xc_domain_node_setaffinity()` href " +https://github.com/xen-project/xen/blob/master/tools/libs/ctrl/xc_domain.c#L122-L158" + +`xc_domain_node_setaffinity()` --> `Xen hypercall: do_domctl()` +`xc_domain_node_setaffinity()` <-- `Xen hypercall: do_domctl()` +class `Xen hypercall: do_domctl()` { + Calls domain_set_node_affinity#40;#41; and returns its return value + Passes: domain (struct domain *, looked up using the domid) + Passes: new_affinity (modemask, converted from xc_nodemap_t) +} +click `Xen hypercall: do_domctl()` href " +https://github.com/xen-project/xen/blob/master/xen/common/domctl.c#L516-L525" + +`Xen hypercall: do_domctl()` --> `domain_set_node_affinity()` +`Xen hypercall: do_domctl()` <-- `domain_set_node_affinity()` +class `domain_set_node_affinity()` { + domain: struct domain + new_affinity: nodemask + 0(on success, the domain's node_affinity is updated) + -EINVAL(if a node in the nodemask is not online) +} +click `domain_set_node_affinity()` href " +https://github.com/xen-project/xen/blob/master/xen/common/domain.c#L943-L970" +``` + +### domain_set_node_affinity() + +This function implements the functionality of `xc_domain_node_setaffinity` +to set the NUMA affinity of a domain as described above. +If the new_affinity does not intersect the `node_online_map`, +it returns `-EINVAL`, otherwise on success `0`. + +When the `new_affinity` is a specific set of NUMA nodes, it updates the NUMA +`node_affinity` of the domain to these nodes and disables `auto_node_affinity` +for this domain. It also notifies the Xen scheduler of the change. + +This sets the preference the memory allocator to the new NUMA nodes, +and in theory, it could also alter the behaviour of the scheduler. +This of course depends on the scheduler and its configuration. + +## Notes on future design improvements + +This call cannot influence the past: The `xenopsd` +[VM_create](../../xenopsd/walkthroughs/VM.start.md#2-create-a-xen-domain) +micro-ops calls `Xenctrl.domain_create`. It currently creates +the domain's data structures before `numa_placement` was done. + +Improving `Xenctrl.domain_create` to pass a NUMA node +for allocating the Hypervisor's data structures (e.g. vCPU) +of the domain would require changes +to the Xen hypervisor and the `xenopsd` +[xenopsd VM_create](../../xenopsd/walkthroughs/VM.start.md#2-create-a-xen-domain) +micro-op. diff --git a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md index fb9534a68e7..7d1152578b5 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md @@ -111,23 +111,6 @@ setting the vCPU affinity causes the Xen hypervisor to activate NUMA node affinity for memory allocations to be aligned with the vCPU affinity of the domain. -Note: See the Xen domain's -[auto_node_affinity](https://wiki.xenproject.org/wiki/NUMA_node_affinity_in_the_Xen_hypervisor) -feature flag, which controls this, which can be overridden in the -Xen hypervisor if needed for specific VMs. - -This can be used, for example, when there might not be enough memory -on the preferred NUMA node, but there are other NUMA nodes that have -enough free memory among with the memory allocations shall be done. - -In terms of future NUMA design, it might be even more favourable to -have a strategy in `xenguest` where in such cases, the superpages -of the preferred node are used first and a fallback to neighbouring -NUMA nodes only happens to the extent necessary. - -Likely, the future allocation strategy should be passed to `xenguest` -using Xenstore like the other platform parameters for the VM. - Summary: This passes the information to the hypervisor that memory allocation for this domain should preferably be done from this NUMA node. @@ -136,3 +119,15 @@ allocation for this domain should preferably be done from this NUMA node. With the preparation in `build_pre` completed, `Domain.build` [calls](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L1127-L1155) the `xenguest` function to invoke the [xenguest](xenguest) program to build the domain. + +## Notes on future design improvements + +The Xen domain feature flag +[domain->auto_node_affinity](https://wiki.xenproject.org/wiki/NUMA_node_affinity_in_the_Xen_hypervisor) +can be disabled by calling +[xc_domain_node_setaffinity()](../../references/xc_domain_node_setaffinity.md) +to set a specific NUMA node affinity in special cases: + +This can be used, for example, when there might not be enough memory on the preferred +NUMA node, and there are other NUMA nodes (in the same CPU package) to use +([reference](../../../lib/xenctrl/xc_domain_node_setaffinity.md)). From 1a76c8a94829abd7e1613452ec75b00f58a501ad Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Thu, 13 Feb 2025 12:00:00 +0100 Subject: [PATCH 053/117] (CI) Use the default setup-python action again (2.7 is obsoleted) Signed-off-by: Bernhard Kaindl --- .github/workflows/other.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 2af01c69969..9c0d23db5ee 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -29,7 +29,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 # To check which files changed: origin/master..HEAD - - uses: LizardByte/setup-python-action@master + - uses: actions/setup-python@v5 with: python-version: ${{matrix.python-version}} From 498a910b1f855ffecca0353f0d2e6a801facf057 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Thu, 13 Feb 2025 12:00:00 +0100 Subject: [PATCH 054/117] (doc) xenopsd/walkthroughs/VM.migrate.md: Fix typos and spelling Signed-off-by: Bernhard Kaindl --- .../xenopsd/walkthroughs/VM.migrate.md | 28 +++++++++---------- doc/content/xenopsd/walkthroughs/VM.start.md | 4 +-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.migrate.md b/doc/content/xenopsd/walkthroughs/VM.migrate.md index e3517ab3f0f..572bed526cb 100644 --- a/doc/content/xenopsd/walkthroughs/VM.migrate.md +++ b/doc/content/xenopsd/walkthroughs/VM.migrate.md @@ -28,8 +28,8 @@ that we will describe in the documentation are: - PCI.plug - VM.set_domain_action_request -The command have serveral parameters such as: should it be ran asynchronously, -should it be forwared to another host, how arguments should be marshalled and +The command has several parameters such as: Should it be started asynchronously, +should it be forwarded to another host, how arguments should be marshalled and so on. A new thread is created by [xapi/server_helpers.ml](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xapi/server_helpers.ml#L55) to handle the command asynchronously. At this point the helper also check if the command should be passed to the [message forwarding](https://github.com/xapi-project/xen-api/blob/master/ocaml/xapi/message_forwarding.ml) @@ -76,26 +76,26 @@ After checking with Qemu that the VM is suspendable we can start the migration. As for *hooks*, commands to source domain are sent using [stunnel](https://github.com/xapi-project/xen-api/tree/master/ocaml/libs/stunnel) a daemon which is used as a wrapper to manage SSL encryption communication between two hosts on the same -pool. To import metada an XML RPC command is sent to the original domain. +pool. To import the metadata, an XML RPC command is sent to the original domain. -Once imported it will give us a reference id and will allow to build the new domain +Once imported, it will give us a reference id and will allow building the new domain on the destination using the temporary VM uuid `XXXXXXXX-XXXX-XXXX-XXXX-000000000001` where `XXX...` is the reference id of the original VM. ## Setting memory -One of the first thing to do is to setup the memory. The backend will check that there +One of the first thing to do is to set up the memory. The backend will check that there is no ballooning operation in progress. At this point the migration can fail if a ballooning operation is in progress and takes too much time. -Once memory checked the daemon will get the state of the VM (running, halted, ...) and -information about the VM are retrieve by the backend like the maximum memory the domain +Once memory has been checked, the daemon will get the state of the VM (running, halted, ...) and +information about the VM is retrieved by the backend like the maximum memory the domain can consume but also information about quotas for example. -Information are retrieve by the backend from xenstore. +The backend retrieves this information from the Xenstore. Once this is complete, we can restore VIF and create the domain. -The synchronisation of the memory is the first point of synchronisation and everythin +The synchronisation of the memory is the first point of synchronisation and everything is ready for VM migration. ## VM Migration @@ -148,12 +148,12 @@ We are almost done. The next step is to create the device model #### create device model Create device model is done by using the atomic operation [VM.create_device_model](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L2375). This -will configure **qemu-dm** and started. This allow to manage PCI devices. +will configure **qemu-dm** and started. This allows to manage PCI devices. #### PCI plug [PCI.plug](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L3399) -is executed by the backend. It plugs a PCI device and advertise it to QEMU if this option is set. It is +is executed by the backend. It plugs a PCI device and advertises it to QEMU if this option is set. It is the case for NVIDIA SR-IOV vGPUS. At this point devices have been restored. The new domain is considered survivable. We can @@ -168,12 +168,12 @@ initiated. Previously we spoke about some points called *hooks* at which `xenopsd` can execute some script. There is also a hook to run a post migrate script. After the execution of the script if there is one -the migration is almost done. The last step is a handskake to seal the success of the migration -and the old VM can now be cleaned. +the migration is almost done. The last step is a handshake to seal the success of the migration +and the old VM can now be cleaned up. # Links -Some links are old but even if many changes occured they are relevant for a global understanding +Some links are old but even if many changes occurred, they are relevant for a global understanding of the XAPI toolstack. - [XAPI architecture](https://xapi-project.github.io/xapi/architecture.html) diff --git a/doc/content/xenopsd/walkthroughs/VM.start.md b/doc/content/xenopsd/walkthroughs/VM.start.md index 6a12e9d9c60..eb16571d35b 100644 --- a/doc/content/xenopsd/walkthroughs/VM.start.md +++ b/doc/content/xenopsd/walkthroughs/VM.start.md @@ -33,7 +33,7 @@ users: - the XenAPI has many clients which are updated on long release cycles. The main property needed is backwards compatibility, so that new release of xapi - remain compatible with these older clients. Quite often we will chose to + remain compatible with these older clients. Quite often, we will choose to "grandfather in" some poorly designed interface simply because we wish to avoid imposing churn on 3rd parties. - the Xenopsd API clients are all open-source and are part of the xapi-project. @@ -166,7 +166,7 @@ via the function It is the responsibility of the client to call [TASK.destroy](https://github.com/xapi-project/xcp-idl/blob/2e5c3dd79c63e3711227892271a6bece98eb0fa1/xen/xenops_interface.ml#L406) -when the Task is nolonger needed. Xenopsd won't destroy the task because it contains +when the Task is no longer needed. Xenopsd won't destroy the task because it contains the success/failure result of the operation which is needed by the client. What happens when a Xenopsd receives a VM.start request? From 30308b6036f0749b08d21ffa4c2d8519ffa8045c Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 14 Feb 2025 11:21:52 +0000 Subject: [PATCH 055/117] Use records when accumulating events In Xapi_event, events are accumulated by folding over the set of tables associated with a subscriber's subscription record. These events are mostly accumulated as lists within a tuple. There is no analogue of functional record update for tuples in OCaml. This means that the separate accumulations have to cite values they will not update. By introducing records, we can only cite the fields we actually change. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 126 ++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index a7412790019..8d5f0a674ef 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -525,6 +525,57 @@ let rec next ~__context = else rpc_of_events relevant +type entry = string * string * Xapi_database.Db_cache_types.Time.t + +type acc = { + creates: entry list + ; mods: entry list + ; deletes: entry list + ; last: Xapi_database.Db_cache_types.Time.t +} + +let collect_events subs tables last_generation acc table = + let open Xapi_database in + let open Db_cache_types in + let table_value = TableSet.find table tables in + let prepend_recent obj stat _ ({creates; mods; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let creates = + if created > !last_generation then + (table, obj, created) :: creates + else + creates + in + let mods = + if modified > !last_generation && not (created > !last_generation) then + (table, obj, modified) :: mods + else + mods + in + {entries with creates; mods; last} + else + entries + in + let prepend_deleted obj stat ({deletes; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let deletes = + if created <= !last_generation then + (table, obj, deleted) :: deletes + else + deletes + in + {entries with deletes; last} + else + entries + in + acc + |> Table.fold_over_recent !last_generation prepend_recent table_value + |> Table.fold_over_deleted !last_generation prepend_deleted table_value + let from_inner __context session subs from from_t timer batching = let open Xapi_database in let open From in @@ -551,75 +602,25 @@ let from_inner __context session subs from from_t timer batching = else (0L, []) in - ( msg_gen - , messages - , tableset - , List.fold_left - (fun acc table -> - (* Fold over the live objects *) - let acc = - Db_cache_types.Table.fold_over_recent !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} _ - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - ( ( if created > !last_generation then - (table, objref, created) :: creates - else - creates - ) - , ( if - modified > !last_generation - && not (created > !last_generation) - then - (table, objref, modified) :: mods - else - mods - ) - , (* Only have a mod event if we don't have a created event *) - deletes - , last - ) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - in - (* Fold over the deleted objects *) - Db_cache_types.Table.fold_over_deleted !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - if created > !last_generation then - (creates, mods, deletes, last) - (* It was created and destroyed since the last update *) - else - (creates, mods, (table, objref, deleted) :: deletes, last) - (* It might have been modified, but we can't tell now *) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - ) - ([], [], [], !last_generation) - tables - ) + let events = + let initial = + {creates= []; mods= []; deletes= []; last= !last_generation} + in + let folder = collect_events subs tableset last_generation in + List.fold_left folder initial tables + in + (msg_gen, messages, tableset, events) in (* Each event.from should have an independent subscription record *) - let msg_gen, messages, tableset, (creates, mods, deletes, last) = + let msg_gen, messages, tableset, events = with_call session subs (fun sub -> let grab_nonempty_range = Throttle.Batching.with_recursive_loop batching @@ fun self arg -> - let ( (msg_gen, messages, _tableset, (creates, mods, deletes, last)) - as result - ) = + let result = Db_lock.with_lock (fun () -> grab_range (Db_backend.make ())) in + let msg_gen, messages, _tables, events = result in + let {creates; mods; deletes; last} = events in if creates = [] && mods = [] @@ -640,6 +641,7 @@ let from_inner __context session subs from from_t timer batching = grab_nonempty_range () ) in + let {creates; mods; deletes; last} = events in last_generation := last ; let event_of op ?snapshot (table, objref, time) = { From 196f4e9d7f075aad43140b56fd15a713014feafa Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 14 Feb 2025 12:15:50 +0000 Subject: [PATCH 056/117] Remove mutable last_generation from Xapi_event In event accumulation for event.from, the code uses a mutable variable to thread a value through event accumulation. However, this value itself is accumulated in the fold: it gets larger for each matching database event that matches a subscription. To avoid the complexity in effectively having a global, mutable variable, we drop it and make it more evident when it changes: it is changed when no events are accumulated (by grab_range). In the case that no events are accumulated, but the deadline hasn't been reached, the code tries to collect events again. It is during a retry that the last_generation needs to be bumped, as it defines the starting point by which to query the database for recent and deleted objects. In short, if no suitable events were gleaned from matching database object records since a given point, there's no point starting from there again. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 8d5f0a674ef..233abcd1eb9 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -543,13 +543,13 @@ let collect_events subs tables last_generation acc table = if Subscription.object_matches subs table obj then let last = max last (max modified deleted) in let creates = - if created > !last_generation then + if created > last_generation then (table, obj, created) :: creates else creates in let mods = - if modified > !last_generation && not (created > !last_generation) then + if modified > last_generation && not (created > last_generation) then (table, obj, modified) :: mods else mods @@ -563,7 +563,7 @@ let collect_events subs tables last_generation acc table = if Subscription.object_matches subs table obj then let last = max last (max modified deleted) in let deletes = - if created <= !last_generation then + if created <= last_generation then (table, obj, deleted) :: deletes else deletes @@ -573,8 +573,8 @@ let collect_events subs tables last_generation acc table = entries in acc - |> Table.fold_over_recent !last_generation prepend_recent table_value - |> Table.fold_over_deleted !last_generation prepend_deleted table_value + |> Table.fold_over_recent last_generation prepend_recent table_value + |> Table.fold_over_deleted last_generation prepend_deleted table_value let from_inner __context session subs from from_t timer batching = let open Xapi_database in @@ -592,9 +592,8 @@ let from_inner __context session subs from from_t timer batching = in List.filter (fun table -> Subscription.table_matches subs table) all in - let last_generation = ref from in let last_msg_gen = ref from_t in - let grab_range t = + let grab_range ~since t = let tableset = Db_cache_types.Database.tableset (Db_ref.get_database t) in let msg_gen, messages = if Subscription.table_matches subs "message" then @@ -603,10 +602,8 @@ let from_inner __context session subs from from_t timer batching = (0L, []) in let events = - let initial = - {creates= []; mods= []; deletes= []; last= !last_generation} - in - let folder = collect_events subs tableset last_generation in + let initial = {creates= []; mods= []; deletes= []; last= since} in + let folder = collect_events subs tableset since in List.fold_left folder initial tables in (msg_gen, messages, tableset, events) @@ -615,9 +612,9 @@ let from_inner __context session subs from from_t timer batching = let msg_gen, messages, tableset, events = with_call session subs (fun sub -> let grab_nonempty_range = - Throttle.Batching.with_recursive_loop batching @@ fun self arg -> + Throttle.Batching.with_recursive_loop batching @@ fun self since -> let result = - Db_lock.with_lock (fun () -> grab_range (Db_backend.make ())) + Db_lock.with_lock (fun () -> grab_range ~since (Db_backend.make ())) in let msg_gen, messages, _tables, events = result in let {creates; mods; deletes; last} = events in @@ -628,21 +625,22 @@ let from_inner __context session subs from from_t timer batching = && messages = [] && not (Clock.Timer.has_expired timer) then ( - last_generation := last ; - (* Cur_id was bumped, but nothing relevent fell out of the db. Therefore the *) + (* cur_id was bumped, but nothing relevent fell out of the database. + Therefore the last ID the client got is equivalent to the current one. *) sub.cur_id <- last ; - (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; wait2 sub last timer ; - (self [@tailcall]) arg + (* The next iteration will fold over events starting after + the last database event that matched a subscription. *) + let next = last in + (self [@tailcall]) next ) else result in - grab_nonempty_range () + grab_nonempty_range from ) in let {creates; mods; deletes; last} = events in - last_generation := last ; let event_of op ?snapshot (table, objref, time) = { id= Int64.to_string time From 7b5c28f7d340a9c60b6f2d57e88b98ac9ad09db9 Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 14 Feb 2025 12:45:58 +0000 Subject: [PATCH 057/117] Factor out event reification In order to provide event records to subscribers, we must convert the accumulated events of the form (table, objref, time) into event records. The process of doing this is simple for objects in the database. The only difference is that deletion events do not provide a snapshot (as the object has been deleted). To avoid repeating ourselves, we define an "events_of" function that accumulates event records. The function takes an argument that specifies whether an attempt to provide a snapshot should be performed. The reification of events associated with messages - which are not stored in the database - is untouched. This relies on a callback instated elsewhere. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 49 +++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 233abcd1eb9..ca059070d00 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -651,39 +651,32 @@ let from_inner __context session subs from from_t timer batching = ; snapshot } in - let events = - List.fold_left - (fun acc x -> - let ev = event_of `del x in - if Subscription.event_matches subs ev then ev :: acc else acc - ) - [] deletes - in - let events = - List.fold_left - (fun acc (table, objref, mtime) -> + let events_of ~kind ?(with_snapshot = true) entries acc = + let rec go events ((table, obj, _time) as entry) = + let snapshot = let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `_mod ?snapshot:xml (table, objref, mtime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events mods + if with_snapshot then + serialiser ~__context ~self:obj () + else + None + in + let event = event_of kind ?snapshot entry in + if Subscription.event_matches subs event then + event :: events + else + events + in + List.fold_left go acc entries in let events = - List.fold_left - (fun acc (table, objref, ctime) -> - let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `add ?snapshot:xml (table, objref, ctime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events creates + [] (* Accumulate the events for objects stored in the database. *) + |> events_of ~kind:`del ~with_snapshot:false deletes + |> events_of ~kind:`_mod mods + |> events_of ~kind:`add creates in let events = + (* Messages require a special casing as their contents are not + stored in the database. *) List.fold_left (fun acc mev -> let event = From 9e60229f508b75b0e7dcb5e95c35857466cd9991 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 14 Feb 2025 12:00:00 +0100 Subject: [PATCH 058/117] (docs) VM.migrate.md: Rephrase and simplify, improve readability Signed-off-by: Bernhard Kaindl --- .../xenopsd/walkthroughs/VM.migrate.md | 178 ++++++++++-------- doc/content/xenopsd/walkthroughs/VM.start.md | 20 +- .../xenopsd/walkthroughs/live-migration.md | 8 +- 3 files changed, 110 insertions(+), 96 deletions(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.migrate.md b/doc/content/xenopsd/walkthroughs/VM.migrate.md index 572bed526cb..8982c4690da 100644 --- a/doc/content/xenopsd/walkthroughs/VM.migrate.md +++ b/doc/content/xenopsd/walkthroughs/VM.migrate.md @@ -3,38 +3,42 @@ title: 'Walkthrough: Migrating a VM' linktitle: 'Migrating a VM' description: Walkthrough of migrating a VM from one host to another. weight: 50 +mermaid: + force: true --- +At the end of this walkthrough, a sequence diagram of the overall process is included. -A XenAPI client wishes to migrate a VM from one host to another within -the same pool. +## Invocation -The client will issue a command to migrate the VM and it will be dispatched +The command to migrate the VM is dispatched by the autogenerated `dispatch_call` function from **xapi/server.ml**. For more information about the generated functions you can have a look to [XAPI IDL model](https://github.com/xapi-project/xen-api/tree/master/ocaml/idl/ocaml_backend). -The command will trigger the operation +The command triggers the operation [VM_migrate](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/lib/xenops_server.ml#L2572) -that has low level operations performed by the backend. These atomics operations -that we will describe in the documentation are: - -- VM.restore -- VM.rename -- VBD.set_active -- VBD.plug -- VIF.set_active -- VGPU.set_active -- VM.create_device_model -- PCI.plug -- VM.set_domain_action_request - -The command has several parameters such as: Should it be started asynchronously, -should it be forwarded to another host, how arguments should be marshalled and -so on. A new thread is created by [xapi/server_helpers.ml](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xapi/server_helpers.ml#L55) -to handle the command asynchronously. At this point the helper also check if +that uses many low level atomics operations. These are: + +- [VM.restore](#VM-restore) +- [VM.rename](#VM-rename) +- [VBD.set_active](#restoring-devices) +- [VBD.plug](#restoring-devices) +- [VIF.set_active](#restoring-devices) +- [VGPU.set_active](#restoring-devices) +- [VM.create_device_model](#creating-the-device-model) +- [PCI.plug](#pci-plug) + +The migrate command has several parameters such as: + +- Should it be started asynchronously, +- Should it be forwarded to another host, +- How arguments should be marshalled, and so on. + +A new thread is created by [xapi/server_helpers.ml](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xapi/server_helpers.ml#L55) +to handle the command asynchronously. The helper thread checks if the command should be passed to the [message forwarding](https://github.com/xapi-project/xen-api/blob/master/ocaml/xapi/message_forwarding.ml) -layer in order to be executed on another host (the destination) or locally if -we are already at the right place. +layer in order to be executed on another host (the destination) or locally (if +it is already at the destination host). It will finally reach [xapi/api_server.ml](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xapi/api_server.ml#L242) that will take the action of posted a command to the message broker [message switch](https://github.com/xapi-project/xen-api/tree/master/ocaml/message-switch). @@ -43,34 +47,38 @@ XAPI daemons. In the case of the migration this message sends by **XAPI** will b consumed by the [xenopsd](https://github.com/xapi-project/xen-api/tree/master/ocaml/xenopsd) daemon that will do the job of migrating the VM. -# The migration of the VM +## Overview The migration is an asynchronous task and a thread is created to handle this task. -The tasks's reference is returned to the client, which can then check +The task reference is returned to the client, which can then check its status until completion. -As we see in the introduction the [xenopsd](https://github.com/xapi-project/xen-api/tree/master/ocaml/xenopsd) -daemon will pop the operation +As shown in the introduction, [xenopsd](https://github.com/xapi-project/xen-api/tree/master/ocaml/xenopsd) +fetches the [VM_migrate](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/lib/xenops_server.ml#L2572) -from the message broker. +operation from the message broker. -Only one backend is know available that interacts with libxc, libxenguest -and xenstore. It is the [xc backend](https://github.com/xapi-project/xen-api/tree/master/ocaml/xenopsd/xc). +All tasks specific to [libxenctrl](../../lib/xenctrl), +[xenguest](VM.build/xenguest) and [Xenstore](https://wiki.xenproject.org/wiki/XenStore) +are handled by the xenopsd +[xc backend](https://github.com/xapi-project/xen-api/tree/master/ocaml/xenopsd/xc). The entities that need to be migrated are: *VDI*, *VIF*, *VGPU* and *PCI* components. -During the migration process the destination domain will be built with the same -uuid than the original VM but the last part of the UUID will be +During the migration process, the destination domain will be built with the same +UUID as the original VM, except that the last part of the UUID will be `XXXXXXXX-XXXX-XXXX-XXXX-000000000001`. The original domain will be removed using `XXXXXXXX-XXXX-XXXX-XXXX-000000000000`. -There are some points called *hooks* at which `xenopsd` can execute some script. -Before starting a migration a command is send to the original domain to execute -a pre migrate script if it exists. +## Preparing VM migration -Before starting the migration a command is sent to Qemu using the Qemu Machine Protocol (QMP) +At specific places, `xenopsd` can execute *hooks* to run scripts. +In case a pre-migrate script is in place, a command to run this script +is sent to the original domain. + +Likewise, a command is sent to Qemu using the Qemu Machine Protocol (QMP) to check that the domain can be suspended (see [xenopsd/xc/device_common.ml](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/device_common.ml)). -After checking with Qemu that the VM is suspendable we can start the migration. +After checking with Qemu that the VM is can be suspended, the migration can begin. ## Importing metadata @@ -82,38 +90,34 @@ Once imported, it will give us a reference id and will allow building the new do on the destination using the temporary VM uuid `XXXXXXXX-XXXX-XXXX-XXXX-000000000001` where `XXX...` is the reference id of the original VM. -## Setting memory +## Memory setup -One of the first thing to do is to set up the memory. The backend will check that there -is no ballooning operation in progress. At this point the migration can fail if a -ballooning operation is in progress and takes too much time. +One of the first steps the setup of the VM's memory: The backend checks that there +is no ballooning operation in progress. If so, the migration could fail. Once memory has been checked, the daemon will get the state of the VM (running, halted, ...) and -information about the VM is retrieved by the backend like the maximum memory the domain -can consume but also information about quotas for example. -The backend retrieves this information from the Xenstore. +The backend retrieves the domain's platform data (memory, vCPUs setc) from the Xenstore. Once this is complete, we can restore VIF and create the domain. The synchronisation of the memory is the first point of synchronisation and everything is ready for VM migration. -## VM Migration +## Destination VM setup After receiving memory we can set up the destination domain. If we have a vGPU we need to kick -off its migration process. We will need to wait the acknowledge that indicates that the entry -for the GPU has been well initialized. before starting the main VM migration. +off its migration process. We will need to wait for the acknowledgement that the +GPU entry has been successfully initialized before starting the main VM migration. -Their is a mechanism of handshake for synchronizing between the source and the -destination. Using the handshake protocol the receiver inform the sender of the -request that everything has been setup and ready to save/restore. +The receiver informs the sender using a handshake protocol +that everything is set up and ready for save/restore. -### VM restore +## Destination VM restore VM restore is a low level atomic operation [VM.restore](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L2684). This operation is represented by a function call to [backend](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/domain.ml#L1540). It uses **Xenguest**, a low-level utility from XAPI toolstack, to interact with the Xen hypervisor -and libxc for sending a request of migration to the **emu-manager**. +and `libxc` for sending a migration request to the **emu-manager**. After sending the request results coming from **emu-manager** are collected by the main thread. It blocks until results are received. @@ -123,16 +127,14 @@ transitions for the devices and handling the message passing for the VM as it's moved between hosts. This includes making sure that the state of the VM's virtual devices, like disks or network interfaces, is correctly moved over. -### VM renaming +## Destination VM rename -Once all operations are done we can rename the VM on the target from its temporary -name to its real UUID. This operation is another low level atomic one +Once all operations are done, `xenopsd` renames the target VM from its temporary +name to its real UUID. This operation is a low-level atomic [VM.rename](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L1667) -that will take care of updating the xenstore on the destination. - -The next step is the restauration of devices and unpause the domain. +which takes care of updating the Xenstore on the destination host. -### Restoring remaining devices +## Restoring devices Restoring devices starts by activating VBD using the low level atomic operation [VBD.set_active](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L3674). It is an update of Xenstore. VBDs that are read-write must @@ -143,39 +145,51 @@ is called. VDI are attached and activate. Next devices are VIFs that are set as active [VIF.set_active](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L4296) and plug [VIF.plug](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L4394). If there are VGPUs we will set them as active now using the atomic [VGPU.set_active](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L3490). -We are almost done. The next step is to create the device model - -#### create device model +### Creating the device model -Create device model is done by using the atomic operation [VM.create_device_model](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L2375). This -will configure **qemu-dm** and started. This allows to manage PCI devices. +[create_device_model](https://github.com/xapi-project/xen-api/blob/ec3b62ee/ocaml/xenopsd/xc/xenops_server_xen.ml#L2293-L2349) +configures **qemu-dm** and starts it. This allows to manage PCI devices. -#### PCI plug +### PCI plug [PCI.plug](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L3399) is executed by the backend. It plugs a PCI device and advertises it to QEMU if this option is set. It is -the case for NVIDIA SR-IOV vGPUS. +the case for NVIDIA SR-IOV vGPUs. -At this point devices have been restored. The new domain is considered survivable. We can -unpause the domain and performs last actions +## Unpause -### Unpause and done +The libxenctrl call +[xc_domain_unpause()](https://github.com/xen-project/xen/blob/414dde3/tools/libs/ctrl/xc_domain.c#L76) +unpauses the domain, and it starts running. -Unpause is done by managing the state of the domain using bindings to [xenctrl](https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=tools/libs/ctrl/xc_domain.c;h=f2d9d14b4d9f24553fa766c5dcb289f88d684bb0;hb=HEAD#l76). -Once hypervisor has unpaused the domain some actions can be requested using [VM.set_domain_action_request](https://github.com/xapi-project/xen-api/blob/7ac88b90e762065c5ebb94a8ea61c61bdbf62c5c/ocaml/xenopsd/xc/xenops_server_xen.ml#L3172). -It is a path in xenstore. By default no action is done but a reboot can be for example -initiated. +## Cleanup -Previously we spoke about some points called *hooks* at which `xenopsd` can execute some script. There -is also a hook to run a post migrate script. After the execution of the script if there is one -the migration is almost done. The last step is a handshake to seal the success of the migration +1. [VM_set_domain_action_request](https://github.com/xapi-project/xen-api/blob/ec3b62ee/ocaml/xenopsd/lib/xenops_server.ml#L3004) + marks the domain as alive: In case `xenopsd` restarts, it no longer reboots the VM. + See the chapter on [marking domains as alive](VM.start#11-mark-the-domain-as-alive) + for more information. + +2. If a post-migrate script is in place, it is executed by the + [Xenops_hooks.VM_post_migrate](https://github.com/xapi-project/xen-api/blob/ec3b62ee/ocaml/xenopsd/lib/xenops_server.ml#L3005-L3009) + hook. + +3. The final step is a handshake to seal the success of the migration and the old VM can now be cleaned up. -# Links +[Syncronisation point 4](https://github.com/xapi-project/xen-api/blob/ec3b62ee/ocaml/xenopsd/lib/xenops_server.ml#L3014) +has been reached, the migration is complete. + +## Live migration flowchart + +This flowchart gives a visual representation of the VM migration workflow: + +{{% include live-migration %}} + +## References -Some links are old but even if many changes occurred, they are relevant for a global understanding -of the XAPI toolstack. +These pages might help for a better understanding of the XAPI toolstack: -- [XAPI architecture](https://xapi-project.github.io/xapi/architecture.html) -- [XAPI dispatcher](https://wiki.xenproject.org/wiki/XAPI_Dispatch) -- [Xenopsd architecture](https://xapi-project.github.io/xenopsd/architecture.html) +- See the [XAPI architecture](../../xapi/_index) for the overall architecture of Xapi +- See the [XAPI dispatcher](https://wiki.xenproject.org/wiki/XAPI_Dispatch) for service dispatch and message forwarding +- See the [Xenopsd architecture](../architecture/_index) for the overall architecture of Xenopsd +- See the [How Xen suspend and resume works](https://mirage.io/docs/xen-suspend) for very similar operations in more detail. diff --git a/doc/content/xenopsd/walkthroughs/VM.start.md b/doc/content/xenopsd/walkthroughs/VM.start.md index eb16571d35b..b043a5d9bf0 100644 --- a/doc/content/xenopsd/walkthroughs/VM.start.md +++ b/doc/content/xenopsd/walkthroughs/VM.start.md @@ -135,17 +135,15 @@ When the Task has completed successfully, then calls to *.stat will show: - a valid start time - valid "targets" for memory and vCPU -Note: before a Task completes, calls to *.stat will show partial updates e.g. -the power state may be Paused but none of the disks may have become plugged. +Note: before a Task completes, calls to *.stat will show partial updates. E.g. +the power state may be paused, but no disk may have been plugged. UI clients must choose whether they are happy displaying this in-between state or whether they wish to hide it and pretend the whole operation has happened -transactionally. If a particular client wishes to perform side-effects in -response to Xenopsd state changes -- for example to clean up an external resource -when a VIF becomes unplugged -- then it must be very careful to avoid responding -to these in-between states. Generally it is safest to passively report these -values without driving things directly from them. Think of them as status lights -on the front panel of a PC: fine to look at but it's not a good idea to wire -them up to actuators which actually do things. +transactionally. If a particular, when a client wishes to perform side-effects in +response to `xenopsd` state changes (for example, to clean up an external resource +when a VIF becomes unplugged), it must be very careful to avoid responding +to these in-between states. Generally, it is safest to passively report these +values without driving things directly from them. Note: the Xenopsd implementation guarantees that, if it is restarted at any point during the start operation, on restart the VM state shall be "fixed" by either @@ -304,7 +302,7 @@ calls bracket plug/unplug. If the "active" flag was set before the unplug attempt then as soon as the frontend/backend connection is removed clients would see the VBD as completely dissociated from the VM -- this would be misleading because Xenopsd will not have had time to use the storage API to release locks -on the disks. By doing all the cleanup before setting "active" to false, clients +on the disks. By cleaning up before setting "active" to false, clients can be assured that the disks are now free to be reassigned. ## 5. handle non-persistent disks @@ -370,7 +368,7 @@ to be the order the nodes were created so this means that (i) xenstored must continue to store directories as ordered lists rather than maps (which would be more efficient); and (ii) Xenopsd must make sure to plug the vifs in the same order. Note that relying on ethX device numbering has always been a -bad idea but is still common. I bet if you change this lots of tests will +bad idea but is still common. I bet if you change this, many tests will suddenly start to fail! The function diff --git a/doc/content/xenopsd/walkthroughs/live-migration.md b/doc/content/xenopsd/walkthroughs/live-migration.md index c6fa02d95fa..b93a4afbaa8 100644 --- a/doc/content/xenopsd/walkthroughs/live-migration.md +++ b/doc/content/xenopsd/walkthroughs/live-migration.md @@ -2,9 +2,12 @@ title = "Live Migration Sequence Diagram" linkTitle = "Live Migration" description = "Sequence diagram of the process of Live Migration." +# Note: This page is included by VM.migrate.md to provide a complete overview +# of the most important parts of live migration. Do not add text as that would +# break the mermaid diagram inclusion. +++ -{{}} +```mermaid sequenceDiagram autonumber participant tx as sender @@ -44,5 +47,4 @@ deactivate rx1 tx->>tx: VM_shutdown
VM_remove deactivate tx - -{{< /mermaid >}} +``` From 6d0fef3a2f7cfec99fc081422a470962c71366c5 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 14 Feb 2025 12:00:00 +0100 Subject: [PATCH 059/117] (docs) Improve two page titles to improve search results Signed-off-by: Bernhard Kaindl --- doc/content/squeezed/architecture/index.md | 5 +++-- doc/content/xenopsd/architecture/_index.md | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/content/squeezed/architecture/index.md b/doc/content/squeezed/architecture/index.md index fb86fd69989..2f7135fe926 100644 --- a/doc/content/squeezed/architecture/index.md +++ b/doc/content/squeezed/architecture/index.md @@ -1,8 +1,9 @@ +++ -title = "Architecture" +title = "Squeezed Architecture" +linkTitle = "Architecture" +++ -Squeezed is responsible for managing the memory on a single host. Squeezed +Squeezed is the XAPI Toolstack’s host memory ballooning daemon. It "balances" memory between VMs according to a policy written to Xenstore. The following diagram shows the internals of Squeezed: diff --git a/doc/content/xenopsd/architecture/_index.md b/doc/content/xenopsd/architecture/_index.md index 0f4d5eccea5..8211e838684 100644 --- a/doc/content/xenopsd/architecture/_index.md +++ b/doc/content/xenopsd/architecture/_index.md @@ -1,5 +1,6 @@ +++ -title = "Architecture" +title = "Xenopsd Architecture" +linkTitle = "Architecture" +++ Xenopsd instances run on a host and manage VMs on behalf of clients. This From 5ea74d9f91982a86574ff8bc92a9684857cc3a43 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 17 Feb 2025 09:40:24 +0000 Subject: [PATCH 060/117] Use record type for individual event entries Further changes to turn tuples into records. Also partially uncurries `collect_events` to make its intended use as a fold more apparent. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index ca059070d00..4c65cb97b65 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -525,16 +525,18 @@ let rec next ~__context = else rpc_of_events relevant -type entry = string * string * Xapi_database.Db_cache_types.Time.t +type time = Xapi_database.Db_cache_types.Time.t + +type entry = {table: string; obj: string; time: time} type acc = { creates: entry list ; mods: entry list ; deletes: entry list - ; last: Xapi_database.Db_cache_types.Time.t + ; last: time } -let collect_events subs tables last_generation acc table = +let collect_events (subs, tables, last_generation) acc table = let open Xapi_database in let open Db_cache_types in let table_value = TableSet.find table tables in @@ -544,13 +546,13 @@ let collect_events subs tables last_generation acc table = let last = max last (max modified deleted) in let creates = if created > last_generation then - (table, obj, created) :: creates + {table; obj; time= created} :: creates else creates in let mods = if modified > last_generation && not (created > last_generation) then - (table, obj, modified) :: mods + {table; obj; time= modified} :: mods else mods in @@ -564,7 +566,7 @@ let collect_events subs tables last_generation acc table = let last = max last (max modified deleted) in let deletes = if created <= last_generation then - (table, obj, deleted) :: deletes + {table; obj; time= deleted} :: deletes else deletes in @@ -603,7 +605,7 @@ let from_inner __context session subs from from_t timer batching = in let events = let initial = {creates= []; mods= []; deletes= []; last= since} in - let folder = collect_events subs tableset since in + let folder = collect_events (subs, tableset, since) in List.fold_left folder initial tables in (msg_gen, messages, tableset, events) @@ -641,18 +643,18 @@ let from_inner __context session subs from from_t timer batching = ) in let {creates; mods; deletes; last} = events in - let event_of op ?snapshot (table, objref, time) = + let event_of op ?snapshot {table; obj; time} = { id= Int64.to_string time ; ts= "0.0" ; ty= String.lowercase_ascii table ; op - ; reference= objref + ; reference= obj ; snapshot } in let events_of ~kind ?(with_snapshot = true) entries acc = - let rec go events ((table, obj, _time) as entry) = + let rec go events ({table; obj; time= _} as entry) = let snapshot = let serialiser = Eventgen.find_get_record table in if with_snapshot then @@ -680,13 +682,14 @@ let from_inner __context session subs from from_t timer batching = List.fold_left (fun acc mev -> let event = + let table = "message" in match mev with | Message.Create (_ref, message) -> event_of `add ?snapshot:(Some (API.rpc_of_message_t message)) - ("message", Ref.string_of _ref, 0L) + {table; obj= Ref.string_of _ref; time= 0L} | Message.Del _ref -> - event_of `del ("message", Ref.string_of _ref, 0L) + event_of `del {table; obj= Ref.string_of _ref; time= 0L} in event :: acc ) From 801252a346dac38213b21cb53c67002e9f5125c8 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 18 Feb 2025 21:21:34 +0800 Subject: [PATCH 061/117] Update datamodel_lifecycle for easier pool join. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_lifecycle.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 0023f8dead7..d2f55d42dde 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -63,6 +63,8 @@ let prototyped_of_field = function Some "23.14.0" | "Observer", "uuid" -> Some "23.14.0" + | "Repository", "certificate" -> + Some "25.7.0" | "Repository", "origin" -> Some "24.23.0" | "Repository", "gpgkey_path" -> @@ -120,7 +122,7 @@ let prototyped_of_field = function | "VM", "actions__after_softreboot" -> Some "23.1.0" | "pool", "license_server" -> - Some "25.5.0-next" + Some "25.6.0" | "pool", "recommendations" -> Some "24.19.1" | "pool", "update_sync_enabled" -> @@ -183,6 +185,8 @@ let prototyped_of_message = function Some "22.20.0" | "Repository", "set_gpgkey_path" -> Some "22.12.0" + | "Repository", "introduce_remote_pool" -> + Some "25.7.0" | "Repository", "introduce_bundle" -> Some "24.23.0" | "PCI", "get_dom0_access_status" -> From 56a06ad526f7f2f57822cc17075461c6580ac014 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 27 Jan 2025 14:29:17 +0800 Subject: [PATCH 062/117] Code refine. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_pool.ml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 11818befce5..0bf5c9e8b4d 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -52,8 +52,11 @@ let get_pool ~rpc ~session_id = | [] -> let err_msg = "Remote host does not belong to a pool." in raise Api_errors.(Server_error (internal_error, [err_msg])) - | pool :: _ -> + | [pool] -> pool + | _ -> + let err_msg = "Should get only one pool." in + raise Api_errors.(Server_error (internal_error, [err_msg])) let get_master ~rpc ~session_id = let pool = get_pool ~rpc ~session_id in @@ -63,16 +66,7 @@ let get_master ~rpc ~session_id = let pre_join_checks ~__context ~rpc ~session_id ~force = (* I cannot join a Pool unless my management interface exists in the db, otherwise Pool.eject will fail to rewrite network interface files. *) - let remote_pool = - match Client.Pool.get_all ~rpc ~session_id with - | [pool] -> - pool - | _ -> - raise - Api_errors.( - Server_error (internal_error, ["Should get only one pool"]) - ) - in + let remote_pool = get_pool ~rpc ~session_id in let assert_management_interface_exists () = try let (_ : API.ref_PIF) = @@ -725,7 +719,6 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) in let assert_tls_verification_matches () = - let remote_pool = get_pool ~rpc ~session_id in let joiner_pool = Helpers.get_pool ~__context in let tls_enabled_pool = Client.Pool.get_tls_verification_enabled ~rpc ~session_id From 0ce80436113aaa54715b9a07262030752de55f78 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 12 Feb 2025 01:52:58 +0800 Subject: [PATCH 063/117] Add Helpers.internal_error Signed-off-by: Gang Ji --- ocaml/tests/test_clustering.ml | 12 +- ocaml/xapi/cert_distrib.ml | 11 +- ocaml/xapi/certificates_sync.ml | 14 +- ocaml/xapi/dbsync_slave.ml | 9 +- ocaml/xapi/helpers.ml | 69 ++----- ocaml/xapi/livepatch.ml | 15 +- ocaml/xapi/message_forwarding.ml | 11 +- ocaml/xapi/nm.ml | 12 +- ocaml/xapi/repository.ml | 12 +- ocaml/xapi/repository_helpers.ml | 38 ++-- ocaml/xapi/rpm.ml | 3 +- ocaml/xapi/vgpuops.ml | 13 +- ocaml/xapi/vhd_tool_wrapper.ml | 15 +- ocaml/xapi/wlb_reports.ml | 6 +- ocaml/xapi/xapi_cluster_host.ml | 35 +--- ocaml/xapi/xapi_clustering.ml | 43 ++--- ocaml/xapi/xapi_dr_task.ml | 7 +- ocaml/xapi/xapi_gpumon.ml | 11 +- ocaml/xapi/xapi_ha_vm_failover.ml | 4 +- ocaml/xapi/xapi_host.ml | 72 ++------ ocaml/xapi/xapi_host_driver_tool.ml | 22 +-- ocaml/xapi/xapi_host_helpers.ml | 14 +- ocaml/xapi/xapi_network_sriov_helpers.ml | 56 +----- ocaml/xapi/xapi_pci.ml | 8 +- ocaml/xapi/xapi_pgpu.ml | 14 +- ocaml/xapi/xapi_pif.ml | 25 +-- ocaml/xapi/xapi_pif_helpers.ml | 13 +- ocaml/xapi/xapi_pool.ml | 17 +- ocaml/xapi/xapi_pool_helpers.ml | 16 +- ocaml/xapi/xapi_pool_patch.ml | 2 +- ocaml/xapi/xapi_pool_update.ml | 2 +- ocaml/xapi/xapi_psr.ml | 121 +++--------- ocaml/xapi/xapi_pusb.ml | 12 +- ocaml/xapi/xapi_pusb_helpers.ml | 8 +- ocaml/xapi/xapi_session.ml | 8 +- ocaml/xapi/xapi_sr.ml | 17 +- ocaml/xapi/xapi_sr_operations.ml | 13 +- ocaml/xapi/xapi_templates.ml | 8 +- ocaml/xapi/xapi_vbd_helpers.ml | 13 +- ocaml/xapi/xapi_vif_helpers.ml | 13 +- ocaml/xapi/xapi_vm.ml | 18 +- ocaml/xapi/xapi_vm_group_helpers.ml | 9 +- ocaml/xapi/xapi_vm_lifecycle.ml | 17 +- ocaml/xapi/xapi_vm_migrate.ml | 13 +- ocaml/xapi/xapi_vm_snapshot.ml | 7 +- ocaml/xapi/xapi_vtpm.ml | 3 +- ocaml/xapi/xapi_vusb_helpers.ml | 14 +- ocaml/xapi/xapi_xenops.ml | 222 +++++------------------ ocaml/xapi/xha_statefile.ml | 12 +- 49 files changed, 272 insertions(+), 857 deletions(-) diff --git a/ocaml/tests/test_clustering.ml b/ocaml/tests/test_clustering.ml index 05980045a11..9be97c5fdb5 100644 --- a/ocaml/tests/test_clustering.ml +++ b/ocaml/tests/test_clustering.ml @@ -165,15 +165,15 @@ let test_find_cluster_host_finds_multiple_cluster_hosts () = let host = Db.Host.get_all ~__context |> List.hd in let _ = T.make_cluster_host ~__context ~host () in let _ = T.make_cluster_host ~__context ~host () in + let err = + Printf.sprintf "Multiple cluster_hosts found for host %s %s" + (Db.Host.get_uuid ~__context ~self:host) + (Ref.string_of host) + in Alcotest.check_raises "test_find_cluster_host_finds_multiple_cluster_hosts should throw an \ internal error" - Api_errors.( - Server_error - ( internal_error - , ["Multiple cluster_hosts found for host"; Ref.string_of host] - ) - ) + Api_errors.(Server_error (internal_error, [err])) (fun () -> ignore (Xapi_clustering.find_cluster_host ~__context ~host)) let test_find_cluster_host = diff --git a/ocaml/xapi/cert_distrib.ml b/ocaml/xapi/cert_distrib.ml index e7624ef8aaa..e5f54435749 100644 --- a/ocaml/xapi/cert_distrib.ml +++ b/ocaml/xapi/cert_distrib.ml @@ -101,7 +101,7 @@ let raise_internal ?e ?(details = "") msg : 'a = e in [msg; details; e] |> String.concat ". " |> D.error "%s" ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg module type CertificateProvider = sig val store_path : string @@ -404,13 +404,8 @@ let exchange_certificates_in_pool ~__context = let throw_op = ( "FIST" , fun () -> - raise - Api_errors.( - Server_error - ( internal_error - , ["/tmp/fist_exchange_certificates_in_pool FIST!"] - ) - ) + Helpers.internal_error + "/tmp/fist_exchange_certificates_in_pool FIST!" ) in let ops' = insert_at rand_i throw_op ops in diff --git a/ocaml/xapi/certificates_sync.ml b/ocaml/xapi/certificates_sync.ml index e1bf42630a0..a9ae4a8b692 100644 --- a/ocaml/xapi/certificates_sync.ml +++ b/ocaml/xapi/certificates_sync.ml @@ -119,19 +119,13 @@ let update ~__context = let* () = sync ~__context ~type':`host_internal in Ok () -let internal_error fmt = - fmt - |> Printf.ksprintf @@ fun msg -> - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) - let remove_from_db ~__context cert = try Db.Certificate.destroy ~__context ~self:cert ; info "removed host certificate %s from db" (Ref.string_of cert) with e -> - internal_error "failed to remove cert %s: %s" (Ref.string_of cert) - (Printexc.to_string e) + Helpers.internal_error ~log_err:true "failed to remove cert %s: %s" + (Ref.string_of cert) (Printexc.to_string e) let path host_uuid = let prefix = !Xapi_globs.trusted_pool_certs_dir in @@ -161,5 +155,5 @@ let eject_certs_from_fs_for ~__context host = | false -> info "host %s has no certificate %s to remove" host_uuid file with e -> - internal_error "failed to remove cert %s on pool eject: %s" file - (Printexc.to_string e) + Helpers.internal_error ~log_err:true + "failed to remove cert %s on pool eject: %s" file (Printexc.to_string e) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 597cd7b5704..2c469ca2af6 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -136,12 +136,9 @@ let refresh_localhost_info ~__context info = let network_state = Scanf.sscanf script_output "Port 80 open: %B" Fun.id in Db.Host.set_https_only ~__context ~self:host ~value:network_state with _ -> - let message = - Printf.sprintf - "unexpected output from /etc/xapi.d/plugins/firewall-port: %s" - script_output - in - raise Api_errors.(Server_error (internal_error, [message])) + Helpers.internal_error + "unexpected output from /etc/xapi.d/plugins/firewall-port: %s" + script_output (*************** update database tools ******************) (** Record host memory properties in database *) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 04043fb0894..2ef16112053 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -34,6 +34,15 @@ module StringSet = Set.Make (String) let ( let* ) = Result.bind +let internal_error ?(log_err = false) ?(err_fun = error) fmt = + Printf.ksprintf + (fun str -> + if log_err then + err_fun "%s" str ; + raise Api_errors.(Server_error (internal_error, [str])) + ) + fmt + let log_exn_continue msg f x = try f x with e -> @@ -719,8 +728,7 @@ let check_domain_type : API.domain_type -> [`hvm | `pv_in_pvh | `pv | `pvh] = | `pvh -> `pvh | `unspecified -> - raise - Api_errors.(Server_error (internal_error, ["unspecified domain type"])) + internal_error "unspecified domain type" let domain_type ~__context ~self : [`hvm | `pv_in_pvh | `pv | `pvh] = let vm = Db.VM.get_record ~__context ~self in @@ -1498,11 +1506,8 @@ let resolve_uri_path ~root ~uri_path = | true, true -> x | _ -> - let msg = - Printf.sprintf "Failed to resolve uri path '%s' under '%s': %s" uri_path - root x - in - raise Api_errors.(Server_error (internal_error, [msg])) + internal_error "Failed to resolve uri path '%s' under '%s': %s" uri_path + root x let run_in_parallel ~funs ~capacity = let rec run_in_parallel' acc funs capacity = @@ -1727,42 +1732,21 @@ let rec retry_until_timeout ?(interval = 0.1) ?(timeout = 5.) doc f = let next_interval = interval *. 1.5 in let next_timeout = timeout -. interval in if next_timeout < 0. then - raise - Api_errors.( - Server_error (internal_error, [Printf.sprintf "retry %s failed" doc]) - ) ; + internal_error "retry %s failed" doc ; Thread.delay interval ; retry_until_timeout ~interval:next_interval ~timeout:next_timeout doc f let get_first_pusb ~__context usb_group = try List.hd (Db.USB_group.get_PUSBs ~__context ~self:usb_group) with _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "there is no PUSB associated with the USB_group: %s" - (Ref.string_of usb_group) - ] - ) - ) + internal_error "there is no PUSB associated with the USB_group: %s" + (Ref.string_of usb_group) let get_first_vusb ~__context usb_group = try List.hd (Db.USB_group.get_VUSBs ~__context ~self:usb_group) with _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "there is no VUSB associated with the USB_group: %s" - (Ref.string_of usb_group) - ] - ) - ) + internal_error "there is no VUSB associated with the USB_group: %s" + (Ref.string_of usb_group) let host_supports_hvm ~__context host = (* We say that a host supports HVM if any of @@ -1873,13 +1857,7 @@ end = struct let to_result ~__context ~of_rpc ~t = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> wait_for_mirror ~__context ~propagate_cancel:true ~t ; - let fail msg = - raise - Api_errors.( - Server_error - (internal_error, [Printf.sprintf "%s, %s" (Ref.string_of t) msg]) - ) - in + let fail msg = internal_error "%s, %s" (Ref.string_of t) msg in let res = match Db.Task.get_status ~__context ~self:t with | `pending -> @@ -1965,15 +1943,8 @@ end = struct in let sufficiently_secret = String.length x > 36 in if has_valid_chars && sufficiently_secret |> not then - raise - Api_errors.( - Server_error - ( internal_error - , [ - {|expected pool secret to match the following regex '^[0-9a-f\/\-]{37,}$'|} - ] - ) - ) ; + internal_error + {|expected pool secret to match the following regex '^[0-9a-f\/\-]{37,}$'|} ; SecretString.of_string x let _make () = diff --git a/ocaml/xapi/livepatch.ml b/ocaml/xapi/livepatch.ml index 63afa9a2c82..3c010993ab2 100644 --- a/ocaml/xapi/livepatch.ml +++ b/ocaml/xapi/livepatch.ml @@ -68,7 +68,7 @@ let of_json js = (ExnHelper.string_of_exn e) msg (Yojson.Basic.pretty_to_string js) ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg let get_latest_livepatch lps = List.map (fun (_, _, t_v, t_r) -> (t_v, t_r)) lps @@ -344,14 +344,11 @@ let apply ~component ~livepatch_file ~base_build_id ~base_version ~base_release | expected_id, Some real_id when expected_id = real_id -> () | _ -> - let msg = - Printf.sprintf - "The livepatch is against build ID %s, but the build ID of the \ - running %s is %s" - expected component_str - (Option.value real ~default:"None") - in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error + "The livepatch is against build ID %s, but the build ID of the \ + running %s is %s" + expected component_str + (Option.value real ~default:"None") in match component with | Xen -> diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 7cad9c91a1c..5f93ff2eafc 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3858,15 +3858,8 @@ functor in () with Forkhelpers.Spawn_internal_error (_, _, _) -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - "Generation of alerts for server certificate expiration \ - failed." - ] - ) - ) + Helpers.internal_error + "Generation of alerts for server certificate expiration failed." let reset_server_certificate ~__context ~host = info "Host.reset_server_certificate: host = '%s'" diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index abb79c7be35..229b53adbe2 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -441,11 +441,7 @@ let rec create_bridges ~__context pif_rc net_rc = ] ) | Network_sriov_logical _ -> - raise - Api_errors.( - Server_error - (internal_error, ["Should not create bridge for SRIOV logical PIF"]) - ) + Helpers.internal_error "Should not create bridge for SRIOV logical PIF" let rec destroy_bridges ~__context ~force pif_rc bridge = let open Xapi_pif_helpers in @@ -468,11 +464,7 @@ let rec destroy_bridges ~__context ~force pif_rc bridge = | Physical _ -> [(bridge, false)] | Network_sriov_logical _ -> - raise - Api_errors.( - Server_error - (internal_error, ["Should not destroy bridge for SRIOV logical PIF"]) - ) + Helpers.internal_error "Should not destroy bridge for SRIOV logical PIF" let determine_static_routes net_rc = if List.mem_assoc "static-routes" net_rc.API.network_other_config then diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index 8e43d72ecba..2195e0f1140 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -372,7 +372,7 @@ let parse_updateinfo ~__context ~self ~check = hash md.RepoMetaData.checksum in error "%s: %s" repo_name msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg ) ) ; let updateinfo_xml_gz_path = @@ -691,9 +691,8 @@ let apply_livepatch ~__context ~host:_ ~component ~base_build_id ~base_version let component' = try component_of_string component with _ -> - let msg = Printf.sprintf "Invalid component name '%s'" component in - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true "Invalid component name '%s'" + component in match Livepatch.get_livepatch_file_path ~component:component' ~base_build_id @@ -703,9 +702,8 @@ let apply_livepatch ~__context ~host:_ ~component ~base_build_id ~base_version Livepatch.apply ~component:component' ~livepatch_file ~base_build_id ~base_version ~base_release ~to_version ~to_release | None -> - let msg = Printf.sprintf "No expected livepatch file for %s" component in - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true "No expected livepatch file for %s" + component let apply_livepatches' ~__context ~host ~livepatches = List.partition_map diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 12b6007f417..54f640494f0 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -77,7 +77,7 @@ module Update = struct with e -> let msg = "Can't construct an update from json" in error "%s: %s" msg (ExnHelper.string_of_exn e) ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg let to_string u = Printf.sprintf "%s.%s %s:%s-%s -> %s:%s-%s from %s:%s" u.name u.arch @@ -186,8 +186,7 @@ let assert_url_is_valid ~url = | [], [] -> () | valids, [] when not (hostname_allowed valids) -> - let msg = "host is not in allowlist" in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "host is not in allowlist" | _, [] -> () | _ -> @@ -199,10 +198,9 @@ let assert_url_is_valid ~url = ) ) | _, None -> - raise Api_errors.(Server_error (internal_error, ["invalid host in url"])) + Helpers.internal_error "invalid host in url" | _ -> - raise - Api_errors.(Server_error (internal_error, ["invalid scheme in url"])) + Helpers.internal_error "invalid scheme in url" with | Api_errors.Server_error (err, _) as e when err = Api_errors.invalid_repository_domain_allowlist -> @@ -307,15 +305,9 @@ let write_yum_config ~source_url ~binary_url ~repo_gpgcheck ~gpgkey_path Filename.concat !Xapi_globs.rpm_gpgkey_dir gpgkey_path in if not (Sys.file_exists gpgkey_abs_path) then - raise - Api_errors.( - Server_error (internal_error, ["gpg key file does not exist"]) - ) ; + Helpers.internal_error "gpg key file does not exist" ; if not ((Unix.lstat gpgkey_abs_path).Unix.st_kind = Unix.S_REG) then - raise - Api_errors.( - Server_error (internal_error, ["gpg key file is not a regular file"]) - ) ; + Helpers.internal_error "gpg key file is not a regular file" ; Printf.sprintf "gpgkey=file://%s" gpgkey_abs_path in match (!Xapi_globs.repository_gpgcheck, repo_gpgcheck) with @@ -374,10 +366,8 @@ let get_repo_config repo_name config_name = | [x] -> x | _ -> - let msg = - Printf.sprintf "Not found %s for repository %s" config_name repo_name - in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "Not found %s for repository %s" config_name + repo_name let get_enabled_repositories ~__context = let pool = Helpers.get_pool ~__context in @@ -950,14 +940,14 @@ let get_latest_updates_from_redundancy ~fail_on_error ~pkgs ~fallback_pkgs = debug "Use 'yum upgrade (dry run)'" ; pkgs | true, false -> - raise Api_errors.(Server_error (internal_error, [err])) + Helpers.internal_error "%s" err | false, false -> (* falling back *) warn "%s" err ; fallback_pkgs in match (fail_on_error, pkgs) with | true, None -> - raise Api_errors.(Server_error (internal_error, [err])) + Helpers.internal_error "%s" err | false, None -> (* falling back *) warn "%s" err ; fallback_pkgs @@ -1122,7 +1112,7 @@ let get_update_in_json ~installed_pkgs (new_pkg, update_id, repo) = | None -> let msg = "Found update from unmanaged repository" in error "%s: %s" msg repo ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg let merge_updates ~repository_name ~updates = let accumulative_updates = @@ -1534,11 +1524,7 @@ let get_ops_of_pending ~__context ~host ~kind = in {host_get; host_add; host_remove; vms_get; vm_add; vm_remove} | Guidance.Livepatch -> - raise - Api_errors.( - Server_error - (internal_error, ["No pending operations for Livepatch guidance"]) - ) + Helpers.internal_error "No pending operations for Livepatch guidance" let set_pending_guidances ~ops ~coming = let pending_of_host = diff --git a/ocaml/xapi/rpm.ml b/ocaml/xapi/rpm.ml index c9823170ae6..09a49c32122 100644 --- a/ocaml/xapi/rpm.ml +++ b/ocaml/xapi/rpm.ml @@ -93,8 +93,7 @@ module Pkg = struct (e, v, r) | Error _ -> let msg = error_msg epoch_ver_rel in - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true "%s" msg let of_fullname s = (* The s likes, I.E. diff --git a/ocaml/xapi/vgpuops.ml b/ocaml/xapi/vgpuops.ml index 284916182ce..e47b0896ab3 100644 --- a/ocaml/xapi/vgpuops.ml +++ b/ocaml/xapi/vgpuops.ml @@ -137,19 +137,14 @@ let reserve_free_virtual_function ~__context vm impl pf = | _other -> let vm_ref = Ref.string_of vm in let pf_ref = Ref.string_of pf in - let msg = - Printf.sprintf "Unexpected GPU implementation vm=%s pf=%s (%s)" - vm_ref pf_ref __LOC__ - in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error + "Unexpected GPU implementation vm=%s pf=%s (%s)" vm_ref pf_ref + __LOC__ ) ; get false | None -> (* This probably means that our capacity checking went wrong! *) - raise - Api_errors.( - Server_error (internal_error, ["No free virtual function found"]) - ) + Helpers.internal_error "No free virtual function found" in match impl with | `nvidia_sriov -> diff --git a/ocaml/xapi/vhd_tool_wrapper.ml b/ocaml/xapi/vhd_tool_wrapper.ml index ee1151febb5..73f25785eb8 100644 --- a/ocaml/xapi/vhd_tool_wrapper.ml +++ b/ocaml/xapi/vhd_tool_wrapper.ml @@ -138,18 +138,9 @@ let find_backend_device path = match String.split '/' backend with | "local" :: "domain" :: bedomid :: _ -> if not (self = bedomid) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "find_backend_device: Got domid %s but expected \ - %s" - bedomid self - ] - ) - ) ; + Helpers.internal_error + "find_backend_device: Got domid %s but expected %s" bedomid + self ; Some params | _ -> raise Not_found diff --git a/ocaml/xapi/wlb_reports.ml b/ocaml/xapi/wlb_reports.ml index baad7f6b35b..e466bf959f5 100644 --- a/ocaml/xapi/wlb_reports.ml +++ b/ocaml/xapi/wlb_reports.ml @@ -162,9 +162,9 @@ let handle req fd _method_name tag (method_name, request_func) = | Api_errors.Server_error (_, _) as exn -> raise exn | exn -> - warn "WLB %s request failed: %s" method_name - (ExnHelper.string_of_exn exn) ; - raise (Api_errors.Server_error (Api_errors.internal_error, [])) + Helpers.internal_error ~log_err:true ~err_fun:warn + "WLB %s request failed: %s" method_name + (ExnHelper.string_of_exn exn) ) (* GET /wlb_report?session_id=&task_id=& diff --git a/ocaml/xapi/xapi_cluster_host.ml b/ocaml/xapi/xapi_cluster_host.ml index 713261931a4..1acceb0c2c5 100644 --- a/ocaml/xapi/xapi_cluster_host.ml +++ b/ocaml/xapi/xapi_cluster_host.ml @@ -174,17 +174,8 @@ let join_internal ~__context ~self = let resync_host ~__context ~host = match find_cluster_host ~__context ~host with | _ when host <> Helpers.get_localhost ~__context -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - "resync_host called with remote host" - ; Ref.string_of host - ; __LOC__ - ] - ) - ) + Helpers.internal_error "resync_host called with remote host: %s (%s)" + (Ref.string_of host) __LOC__ | None -> () (* no clusters exist *) | Some self -> @@ -434,25 +425,13 @@ let sync_required ~__context ~host = else None | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - "Host cannot be associated with more than one cluster_host" - ; Ref.string_of host - ] - ) - ) + Helpers.internal_error + "Host cannot be associated with more than one cluster_host: %s" + (Ref.string_of host) ) | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , ["Cannot have more than one Cluster object per pool currently"] - ) - ) + Helpers.internal_error + "Cannot have more than one Cluster object per pool currently" (* If cluster found without local cluster_host, create one in db *) let create_as_necessary ~__context ~host = diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index 4bef40e3d4d..efaac876d69 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -63,12 +63,9 @@ let pif_of_host ~__context (network : API.ref_network) (host : API.ref_host) = | [(ref, record)] -> (ref, record) | _ -> - let msg = - Printf.sprintf "No PIF found for host:%s and network:%s" - (Ref.string_of host) (Ref.string_of network) - in - debug "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true + "No PIF found for host:%s and network:%s" (Ref.string_of host) + (Ref.string_of network) let ip_of_pif (ref, record) = let ip = record.API.pIF_IP in @@ -113,7 +110,7 @@ let assert_pif_attached_to ~__context ~host ~pIF = let handle_error = function | InternalError message -> - raise Api_errors.(Server_error (internal_error, [message])) + Helpers.internal_error "%s" message | Unix_error message -> failwith ("Unix Error: " ^ message) @@ -127,13 +124,8 @@ let assert_cluster_host_can_be_created ~__context ~host = | [] -> () | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , ["Cluster host cannot be created because it already exists"] - ) - ) + Helpers.internal_error + "Cluster host cannot be created because it already exists" (** One of the cluster stacks returned by [get_required_cluster_stacks context sr_sm_type] @@ -179,9 +171,10 @@ let find_cluster_host ~__context ~host = Some ref | _ :: _ -> (* should never happen; this indicates a bug *) - let msg = "Multiple cluster_hosts found for host" in - error "%s %s" msg (Db.Host.get_uuid ~__context ~self:host) ; - raise Api_errors.(Server_error (internal_error, [msg; Ref.string_of host])) + Helpers.internal_error ~log_err:true + "Multiple cluster_hosts found for host %s %s" + (Db.Host.get_uuid ~__context ~self:host) + (Ref.string_of host) | _ -> None @@ -218,13 +211,8 @@ let assert_cluster_host_enabled ~__context ~self ~expected = xapi-clusterd daemon running on the target host *) let assert_operation_host_target_is_localhost ~__context ~host = if host <> Helpers.get_localhost ~__context then - raise - Api_errors.( - Server_error - ( internal_error - , ["A clustering operation was attempted from the wrong host"] - ) - ) + Helpers.internal_error + "A clustering operation was attempted from the wrong host" let assert_cluster_host_has_no_attached_sr_which_requires_cluster_stack ~__context ~self = @@ -279,12 +267,7 @@ module Daemon = struct ["open"; port] ; maybe_call_script ~__context !Xapi_globs.systemctl ["enable"; service] ; maybe_call_script ~__context !Xapi_globs.systemctl ["start"; service] - with _ -> - raise - Api_errors.( - Server_error - (internal_error, [Printf.sprintf "could not start %s" service]) - ) + with _ -> Helpers.internal_error "could not start %s" service ) ; Atomic.set enabled true ; debug "Cluster daemon: enabled & started" diff --git a/ocaml/xapi/xapi_dr_task.ml b/ocaml/xapi/xapi_dr_task.ml index 40a9a992c9e..de7d15e0523 100644 --- a/ocaml/xapi/xapi_dr_task.ml +++ b/ocaml/xapi/xapi_dr_task.ml @@ -124,12 +124,7 @@ let create ~__context ~_type ~device_config ~whitelist = let sr_records = try parse_sr_probe probe_result with Failure msg -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [Printf.sprintf "SR probe response was malformed: %s" msg] - ) - ) + Helpers.internal_error "SR probe response was malformed: %s" msg in (* If the SR record has a UUID, make sure it's in the whitelist. *) let sr_records = diff --git a/ocaml/xapi/xapi_gpumon.ml b/ocaml/xapi/xapi_gpumon.ml index fa71fce96b5..1502ad44e20 100644 --- a/ocaml/xapi/xapi_gpumon.ml +++ b/ocaml/xapi/xapi_gpumon.ml @@ -149,8 +149,7 @@ module Nvidia = struct let host = Helpers.get_localhost ~__context |> Ref.string_of in raise Api_errors.(Server_error (nvidia_tools_error, [host])) | err -> - let msg = Printexc.to_string err in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" (Printexc.to_string err) (* N.B. the vgpu (and the vm) must be in the local host where this function runs *) let assert_pgpu_is_compatible_with_vm ~__context ~vm ~vgpu ~dest_host @@ -181,10 +180,7 @@ module Nvidia = struct Server_error (nvidia_tools_error, [Ref.string_of host]) ) | err -> - raise - Api_errors.( - Server_error (internal_error, [Printexc.to_string err]) - ) + Helpers.internal_error "%s" (Printexc.to_string err) in match compatibility with | Gpumon_interface.Compatible -> @@ -265,8 +261,7 @@ module Nvidia = struct | exception Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) -> raise Api_errors.(Server_error (nvidia_tools_error, [localhost'])) | exception err -> - raise - Api_errors.(Server_error (internal_error, [Printexc.to_string err])) + Helpers.internal_error "%s" (Printexc.to_string err) in if is_nvidia ~__context ~vgpu then check vgpu diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 322d30f7996..5cbb946b150 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -367,9 +367,7 @@ let rec select_host_for_anti_aff_evac_plan vm_size hosts_psq = ) let impossible_error_handler () = - let msg = "Data corrupted during host evacuation." in - error "%s" msg ; - raise (Api_errors.Server_error (Api_errors.internal_error, [msg])) + Helpers.internal_error ~log_err:true "Data corrupted during host evacuation." (*****************************************************************************************************) (* Planning code follows *) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index ec61f37f0ea..95e1b600fa2 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -655,11 +655,7 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = let finally = Xapi_stdext_pervasives.Pervasiveext.finally in let destroy = Client.Client.Task.destroy in let fail task msg = - raise - Api_errors.( - Server_error - (internal_error, [Printf.sprintf "%s, %s" (Ref.string_of task) msg]) - ) + Helpers.internal_error "%s, %s" (Ref.string_of task) msg in let assert_success task = @@ -729,16 +725,8 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = in let remainder = List.length vms in if not (remainder = 0) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "evacuate: %d VMs are still resident on %s" - remainder (Ref.string_of host) - ] - ) - ) + Helpers.internal_error "evacuate: %d VMs are still resident on %s" remainder + (Ref.string_of host) let retrieve_wlb_evacuate_recommendations ~__context ~self = let plans = compute_evacuation_plan_wlb ~__context ~self in @@ -1599,9 +1587,8 @@ let _new_host_cert ~dbg ~path : X509.Certificate.t = let ip_as_string, ip = match Networking_info.get_management_ip_addr ~dbg with | None -> - let msg = Printf.sprintf "%s: failed to get management IP" __LOC__ in - D.error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true ~err_fun:D.error + "%s: failed to get management IP" __LOC__ | Some ip -> ip in @@ -2068,7 +2055,7 @@ let apply_edition_internal ~__context ~host ~edition ~additional = | V6_interface.(V6_error (License_checkout_error s)) -> raise Api_errors.(Server_error (license_checkout_error, [s])) | V6_interface.(V6_error (Internal_error e)) -> - raise Api_errors.(Server_error (internal_error, [e])) + Helpers.internal_error "%s" e in let create_feature fname fenabled = Db.Feature.create ~__context @@ -2139,9 +2126,7 @@ let license_add ~__context ~host ~contents = Pervasiveext.finally (fun () -> ( try Unixext.write_string_to_file tmp license - with _ -> - let s = "Failed to write temporary file." in - raise Api_errors.(Server_error (internal_error, [s])) + with _ -> Helpers.internal_error "Failed to write temporary file." ) ; apply_edition_internal ~__context ~host ~edition:"" ~additional:[("license_file", tmp)] @@ -2313,16 +2298,8 @@ let sync_vlans ~__context ~host = in Db.PIF.get_network ~__context ~self:pif_underneath_vlan | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "Cannot find vlan from a vlan master PIF:%s" - vlan_pif_rec.API.pIF_uuid - ] - ) - ) + Helpers.internal_error "Cannot find vlan from a vlan master PIF:%s" + vlan_pif_rec.API.pIF_uuid in let maybe_create_vlan (_, master_pif_rec) = (* Check to see if the slave has any existing pif(s) that for the specified device, network, vlan... *) @@ -2409,16 +2386,8 @@ let sync_tunnels ~__context ~host = let protocol = Db.Tunnel.get_protocol ~__context ~self:tunnel in (Db.PIF.get_network ~__context ~self:transport_pif, protocol) | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "PIF %s has no tunnel_access_PIF_of" - access_pif_rec.API.pIF_uuid - ] - ) - ) + Helpers.internal_error "PIF %s has no tunnel_access_PIF_of" + access_pif_rec.API.pIF_uuid in let maybe_create_tunnel_for_me (_, master_pif_rec) = (* check to see if I have any existing pif(s) that for the specified device, network, vlan... *) @@ -2881,8 +2850,7 @@ let set_sched_gran ~__context ~self ~value = () with e -> error "Failed to update sched-gran: %s" (Printexc.to_string e) ; - raise - Api_errors.(Server_error (internal_error, ["Failed to update sched-gran"])) + Helpers.internal_error "Failed to update sched-gran" let get_sched_gran ~__context ~self = if Helpers.get_localhost ~__context <> self then @@ -2900,8 +2868,7 @@ let get_sched_gran ~__context ~self = Record_util.host_sched_gran_of_string value with e -> error "Failed to get sched-gran: %s" (Printexc.to_string e) ; - raise - Api_errors.(Server_error (internal_error, ["Failed to get sched-gran"])) + Helpers.internal_error "Failed to get sched-gran" let emergency_disable_tls_verification ~__context = (* NB: the tls-verification state on this host will no longer agree with state.db *) @@ -2915,16 +2882,9 @@ let emergency_disable_tls_verification ~__context = with e -> info "Failed to update database after TLS verication was disabled: %s" (Printexc.to_string e) ; - raise - Api_errors.( - Server_error - ( internal_error - , [ - "TLS verification disabled successfully. Failed to contact the \ - coordinator to update the database." - ] - ) - ) + Helpers.internal_error + "TLS verification disabled successfully. Failed to contact the \ + coordinator to update the database." let emergency_reenable_tls_verification ~__context = (* NB: Should only be used after running emergency_disable_tls_verification. diff --git a/ocaml/xapi/xapi_host_driver_tool.ml b/ocaml/xapi/xapi_host_driver_tool.ml index f55d03ad3c0..dd719dcd7ec 100644 --- a/ocaml/xapi/xapi_host_driver_tool.ml +++ b/ocaml/xapi/xapi_host_driver_tool.ml @@ -18,14 +18,6 @@ open Debug.Make (struct let name = __MODULE__ end) -let internal_error fmt = - Printf.ksprintf - (fun str -> - error "%s" str ; - raise Api_errors.(Server_error (internal_error, [str])) - ) - fmt - (** types to represent the JSON output of the script that reports drivers *) type variant = { @@ -219,7 +211,8 @@ let parse str = | R.Ok x -> x | R.Error msg -> - internal_error "%s parsing failed: %s" __FUNCTION__ msg + Helpers.internal_error ~log_err:true "%s parsing failed: %s" __FUNCTION__ + msg | exception e -> raise e @@ -228,7 +221,8 @@ let read path = | R.Ok x -> x | R.Error msg -> - internal_error "%s parsing %s failed: %s" __FUNCTION__ path msg + Helpers.internal_error ~log_err:true "%s parsing %s failed: %s" + __FUNCTION__ path msg | exception e -> raise e @@ -239,8 +233,8 @@ let call args = debug "%s: executed %s %s" __FUNCTION__ path (String.concat " " args) ; stdout with e -> - internal_error "%s: failed to run %s %s: %s" __FUNCTION__ path - (String.concat " " args) (Printexc.to_string e) + Helpers.internal_error ~log_err:true "%s: failed to run %s %s: %s" + __FUNCTION__ path (String.concat " " args) (Printexc.to_string e) module Mock = struct let drivertool_sh = @@ -651,6 +645,6 @@ esac Xapi_stdext_unix.Unixext.write_string_to_file path drivertool_sh ; Unix.chmod path 0o755 with e -> - internal_error "%s: can't install %s: %s" __FUNCTION__ path - (Printexc.to_string e) + Helpers.internal_error ~log_err:true "%s: can't install %s: %s" + __FUNCTION__ path (Printexc.to_string e) end diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index eb707de3823..834b34beb4b 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -153,17 +153,9 @@ let valid_operations ~__context record _ref' = let throw_error table op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_host_helpers.assert_operation_valid unknown operation: \ - %s" - (host_operation_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_host_helpers.assert_operation_valid unknown operation: %s" + (host_operation_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> diff --git a/ocaml/xapi/xapi_network_sriov_helpers.ml b/ocaml/xapi/xapi_network_sriov_helpers.ml index 6600f6a2f44..6be9c149b8e 100644 --- a/ocaml/xapi/xapi_network_sriov_helpers.ml +++ b/ocaml/xapi/xapi_network_sriov_helpers.ml @@ -25,16 +25,8 @@ let get_sriov_of ~__context ~sriov_logical_pif = | v :: _ -> v | [] -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "Cannot find sriov object in sriov logical PIF %s" - (Ref.string_of sriov_logical_pif) - ] - ) - ) + Helpers.internal_error "Cannot find sriov object in sriov logical PIF %s" + (Ref.string_of sriov_logical_pif) let sriov_bring_up ~__context ~self = let update_sriov_with_result result = @@ -121,18 +113,9 @@ let require_operation_on_pci_device ~__context ~sriov ~self = | v :: _ -> v | [] -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "Cannot find sriov object in sriov logical \ - PIF %s" - pif_rec.API.pIF_uuid - ] - ) - ) + Helpers.internal_error + "Cannot find sriov object in sriov logical PIF %s" + pif_rec.API.pIF_uuid in let physical_pif = Db.Network_sriov.get_physical_PIF ~__context ~self:sriov @@ -235,21 +218,14 @@ let get_remaining_capacity_on_host ~__context ~host ~network = in match local_pifs with | [] -> - raise - Api_errors.( - Server_error (internal_error, ["Cannot get local pif on network"]) - ) + Helpers.internal_error "Cannot get local pif on network" | local_pif :: _ -> ( match get_underlying_pif ~__context ~pif:local_pif with | Some underlying_pif -> let pci = Db.PIF.get_PCI ~__context ~self:underlying_pif in Xapi_pci.get_idle_vf_nums ~__context ~self:pci | None -> - raise - Api_errors.( - Server_error - (internal_error, ["Cannot get underlying pif on sriov network"]) - ) + Helpers.internal_error "Cannot get underlying pif on sriov network" ) (* Partition hosts by attached and unattached pifs, the network input is a SR-IOV type. @@ -276,16 +252,8 @@ let group_hosts_by_best_sriov ~__context ~network = | VLAN_untagged _ :: Network_sriov_logical sriov :: _ -> sriov | _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "Cannot find sriov object in PIF %s" - (Ref.string_of pif) - ] - ) - ) + Helpers.internal_error "Cannot find sriov object in PIF %s" + (Ref.string_of pif) in if can_be_up_without_reboot ~__context sriov then (l1, (host, 0L) :: l2) @@ -320,11 +288,7 @@ let reserve_sriov_vfs ~__context ~host ~vm = | Some vf -> Db.VIF.set_reserved_pci ~__context ~self:vif ~value:vf | None -> - raise - Api_errors.( - Server_error - (internal_error, ["No free virtual function found"]) - ) + Helpers.internal_error "No free virtual function found" ) ) vifs diff --git a/ocaml/xapi/xapi_pci.ml b/ocaml/xapi/xapi_pci.ml index 7c805c7e9cf..34bb80af541 100644 --- a/ocaml/xapi/xapi_pci.ml +++ b/ocaml/xapi/xapi_pci.ml @@ -274,11 +274,9 @@ let update_pcis ~__context = r ) with Not_found -> - let msg = - Printf.sprintf "failed to update PCI dependencies for %s (%s)" - (Ref.string_of pref) __LOC__ - in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error + "failed to update PCI dependencies for %s (%s)" + (Ref.string_of pref) __LOC__ in Db.PCI.set_dependencies ~__context ~self:pref ~value:dependencies ; update remaining diff --git a/ocaml/xapi/xapi_pgpu.ml b/ocaml/xapi/xapi_pgpu.ml index a8ce14d7347..50f2f77f7be 100644 --- a/ocaml/xapi/xapi_pgpu.ml +++ b/ocaml/xapi/xapi_pgpu.ml @@ -391,7 +391,6 @@ let nvidia_vf_setup_mutex = Mutex.create () let nvidia_vf_setup ~__context ~pf ~enable = let sprintf = Printf.sprintf in - let fail msg = Api_errors.(Server_error (internal_error, [msg])) in let script = !Xapi_globs.nvidia_sriov_manage_script in let enable' = if enable then "-e" else "-d" in let bind_path = "/sys/bus/pci/drivers/nvidia/bind" in @@ -409,12 +408,9 @@ let nvidia_vf_setup ~__context ~pf ~enable = debug "File %s does not exist - assuming no SRIOV devices in use" path ; None | exn -> - let msg = - Printf.sprintf "Can't read %s to activate Nvidia GPU %s: %s" path pci - (Printexc.to_string exn) - in - error "%s" msg ; - raise (fail msg) + Helpers.internal_error ~log_err:true + "Can't read %s to activate Nvidia GPU %s: %s" path pci + (Printexc.to_string exn) in let write_to path pci = try @@ -423,7 +419,7 @@ let nvidia_vf_setup ~__context ~pf ~enable = with e -> error "failed to write to %s to re-bind PCI %s to Nvidia driver: %s" path pci (Printexc.to_string e) ; - raise (fail (sprintf "Can't rebind PCI %s driver" pci)) + Helpers.internal_error "Can't rebind PCI %s driver" pci in let bind_to_nvidia pci = match Xapi_pci_helpers.get_driver_name pci with @@ -458,7 +454,7 @@ let nvidia_vf_setup ~__context ~pf ~enable = debug "PCI %s already has %n VFs - not calling %s" pci n script | _ -> error "nvdia_vf_setup %s does not exist" script ; - raise (fail (sprintf "Can't locate %s" script)) + Helpers.internal_error "Can't locate %s" script in (* Update the gpus even if the VFs were present already, in case they were * already created before xapi was (re)started. *) diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index 1e82189f293..19946dca783 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -90,32 +90,15 @@ let refresh_internal ~__context ~self = let refresh ~__context ~host ~self = let localhost = Helpers.get_localhost ~__context in if not (host = localhost) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "refresh: Host mismatch, expected %s but got %s" - (Ref.string_of host) (Ref.string_of localhost) - ] - ) - ) ; + Helpers.internal_error "refresh: Host mismatch, expected %s but got %s" + (Ref.string_of host) (Ref.string_of localhost) ; refresh_internal ~__context ~self let refresh_all ~__context ~host = let localhost = Helpers.get_localhost ~__context in if not (host = localhost) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "refresh_all: Host mismatch, expected %s but got %s" - (Ref.string_of host) (Ref.string_of localhost) - ] - ) - ) ; + Helpers.internal_error "refresh_all: Host mismatch, expected %s but got %s" + (Ref.string_of host) (Ref.string_of localhost) ; (* Only refresh physical or attached PIFs *) let pifs = Db.PIF.get_refs_where ~__context diff --git a/ocaml/xapi/xapi_pif_helpers.ml b/ocaml/xapi/xapi_pif_helpers.ml index ae29c3c366e..fac7593b7d2 100644 --- a/ocaml/xapi/xapi_pif_helpers.ml +++ b/ocaml/xapi/xapi_pif_helpers.ml @@ -84,17 +84,8 @@ let get_pif_type pif_rec = | Some v, _ -> v | None, _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "Cannot calculate PIF type of %s" - pif_rec.API.pIF_uuid - ] - ) - ) - + Helpers.internal_error "Cannot calculate PIF type of %s" + pif_rec.API.pIF_uuid (* The root PIF underneath should be Physical or Bond_master *) (** This function aims to get a list of types of the PIFs underneath the given PIF *) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 0bf5c9e8b4d..05d50e97784 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -50,13 +50,13 @@ let rpc ~__context ~verify_cert host_address xml = let get_pool ~rpc ~session_id = match Client.Pool.get_all ~rpc ~session_id with | [] -> - let err_msg = "Remote host does not belong to a pool." in - raise Api_errors.(Server_error (internal_error, [err_msg])) + Helpers.internal_error "Remote host does not belong to a pool." | [pool] -> pool - | _ -> - let err_msg = "Should get only one pool." in - raise Api_errors.(Server_error (internal_error, [err_msg])) + | pools -> + Helpers.internal_error "Should get only one pool, but got %d: %s" + (List.length pools) + (pools |> List.map Ref.string_of |> String.concat ",") let get_master ~rpc ~session_id = let pool = get_pool ~rpc ~session_id in @@ -1034,8 +1034,7 @@ and create_or_get_sr_on_master __context rpc session_id (_sr_ref, sr) : |> List.find (fun (_, sr) -> sr.API.sR_is_tools_sr) |> fun (ref, _) -> ref with Not_found -> - let msg = Printf.sprintf "can't find SR %s of tools iso" my_uuid in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "can't find SR %s of tools iso" my_uuid else ( debug "Found no SR with uuid = '%s' on the master, so creating one." my_uuid ; @@ -3798,8 +3797,8 @@ let set_telemetry_next_collection ~__context ~self ~value = | Some dt1, dt2 -> (dt1, dt2) | _ | (exception _) -> - let err_msg = "Can't parse date and time for telemetry collection." in - raise Api_errors.(Server_error (internal_error, [err_msg])) + Helpers.internal_error + "Can't parse date and time for telemetry collection." in let ts = Date.to_rfc3339 value in match Ptime.is_later dt_of_value ~than:dt_of_max_sched with diff --git a/ocaml/xapi/xapi_pool_helpers.ml b/ocaml/xapi/xapi_pool_helpers.ml index ec281ade966..2a3aedaf4f0 100644 --- a/ocaml/xapi/xapi_pool_helpers.ml +++ b/ocaml/xapi/xapi_pool_helpers.ml @@ -131,17 +131,9 @@ let valid_operations ~__context record (pool : API.ref_pool) = let throw_error table op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_pool_helpers.assert_operation_valid unknown operation: \ - %s" - (pool_allowed_operations_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_pool_helpers.assert_operation_valid unknown operation: %s" + (pool_allowed_operations_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> @@ -206,7 +198,7 @@ let assert_no_pool_ops ~__context = |> String.concat "; " |> Printf.sprintf "pool operations in progress: [ %s ]" in - raise Api_errors.(Server_error (internal_error, [err])) + Helpers.internal_error "%s" err let get_master_slaves_list_with_fn ~__context fn = let _unsorted_hosts = Db.Host.get_all ~__context in diff --git a/ocaml/xapi/xapi_pool_patch.ml b/ocaml/xapi/xapi_pool_patch.ml index 72033070bc5..4cf5319804f 100644 --- a/ocaml/xapi/xapi_pool_patch.ml +++ b/ocaml/xapi/xapi_pool_patch.ml @@ -46,7 +46,7 @@ let pool_patch_of_update ~__context update_ref = "Invalid state: Expected invariant - 1 pool_patch per pool_update. \ Found: [%s]" (String.concat ";" (List.map (fun patch -> Ref.string_of patch) patches)) ; - raise Api_errors.(Server_error (internal_error, ["Invalid state"])) + Helpers.internal_error "Invalid state" let pool_patch_upload_handler (req : Http.Request.t) s _ = debug "Patch Upload Handler - Entered..." ; diff --git a/ocaml/xapi/xapi_pool_update.ml b/ocaml/xapi/xapi_pool_update.ml index 2307eca8589..426db45cbcd 100644 --- a/ocaml/xapi/xapi_pool_update.ml +++ b/ocaml/xapi/xapi_pool_update.ml @@ -119,7 +119,7 @@ let assert_update_vbds_attached ~__context ~vdi = "pool_update: expected VBDs=[ %s ] to be attached but they aren't!" (unplugged |> List.map Ref.string_of |> String.concat "; ") in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "%s" msg let with_dec_refcount ~__context ~uuid ~vdi f = with_lock updates_to_attach_count_tbl_mutex (fun () -> diff --git a/ocaml/xapi/xapi_psr.ml b/ocaml/xapi/xapi_psr.ml index aa2481b3eca..8aca87ddd28 100644 --- a/ocaml/xapi/xapi_psr.ml +++ b/ocaml/xapi/xapi_psr.ml @@ -183,10 +183,7 @@ functor (* _in theory_ save_checkpoint or backup could fail, so catch that here. however we don't expect this to happen *) D.error "PSR.start: unexpected error: %s" (Printexc.to_string e) ; - raise - Api_errors.( - Server_error (internal_error, ["PSR.start: unexpected error"]) - ) + Helpers.internal_error "PSR.start: unexpected error" end let perm = 0o640 @@ -220,11 +217,7 @@ end = struct | false, false -> false | false, true | true, false -> - raise - Api_errors.( - Server_error - (internal_error, ["do_backups_exist: invalid backup state"]) - ) + Helpers.internal_error "do_backups_exist: invalid backup state" let does_checkpoint_exist () = Sys.file_exists checkpoint_path @@ -235,34 +228,23 @@ end = struct SecretString.(equal old_backup old_ps && equal new_backup new_ps) in if not do_backups_match then - raise Api_errors.(Server_error (internal_error, ["backups don't match"])) + Helpers.internal_error "backups don't match" let no_backups () = if do_backups_exist () then - raise - Api_errors.( - Server_error (internal_error, ["pool member should have no backups"]) - ) + Helpers.internal_error "pool member should have no backups" let no_checkpoint () = (* we expect a checkpoint on the master, but not slaves *) if Pool_role.is_slave () && does_checkpoint_exist () then - raise - Api_errors.( - Server_error - (internal_error, ["pool member should not have a checkpoint"]) - ) + Helpers.internal_error "pool member should not have a checkpoint" let master_state_valid () = match (do_backups_exist (), does_checkpoint_exist ()) with | false, false | true, true -> () | false, true | true, false -> - raise - Api_errors.( - Server_error - (internal_error, ["master pool secret rotation state is invalid"]) - ) + Helpers.internal_error "master pool secret rotation state is invalid" end let cleanup_internal ~additional_files_to_remove ~old_ps ~new_ps = @@ -271,16 +253,9 @@ let cleanup_internal ~additional_files_to_remove ~old_ps ~new_ps = Assert.no_backups () ; D.info "xapi_psr.ml:cleanup_internal: already cleaned up" | [_] -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - "cleanup_internal: host has been cleaned up, but pool secret \ - doesn't match" - ] - ) - ) + Helpers.internal_error + "cleanup_internal: host has been cleaned up, but pool secret doesn't \ + match" | [priority_1_ps; priority_2_ps] when SecretString.(equal new_ps priority_1_ps && equal old_ps priority_2_ps) -> @@ -301,23 +276,10 @@ let cleanup_internal ~additional_files_to_remove ~old_ps ~new_ps = (* psr done, so stop accepting old pool secret *) Xapi_globs.pool_secrets := [priority_1_ps] | [_; _] -> - raise - Api_errors.( - Server_error - (internal_error, ["cleanup_internal: runtime secrets don't match"]) - ) + Helpers.internal_error "cleanup_internal: runtime secrets don't match" | l -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "cleanup_internal: expected 1 or 2 pool secrets, got: %i" - (List.length l) - ] - ) - ) + Helpers.internal_error + "cleanup_internal: expected 1 or 2 pool secrets, got: %i" (List.length l) module Impl = functor @@ -348,11 +310,7 @@ functor Some x | [] | _ :: _ -> D.error "unexpected checkpoint file format: %s" checkpoint_path ; - raise - Api_errors.( - Server_error - (internal_error, ["unexpected checkpoint file format"]) - ) + Helpers.internal_error "unexpected checkpoint file format" | exception e -> D.info "tried to read %s, but encountered exception %s. assuming it \ @@ -402,36 +360,18 @@ let notify_new ~__context ~old_ps ~new_ps = then D.info "xapi_psr.ml:notify_new: already accepting new pool secret" else - raise - Api_errors.( - Server_error - ( internal_error - , ["notify_new: existing pool secrets are inconsistent"] - ) - ) + Helpers.internal_error + "notify_new: existing pool secrets are inconsistent" | [priority_1_ps] when SecretString.equal priority_1_ps old_ps -> if Pool_role.is_slave () then Assert.no_backups () ; SecretString.write_to_file old_pool_secret_backup_path old_ps ; SecretString.write_to_file new_pool_secret_backup_path new_ps ; Xapi_globs.pool_secrets := [old_ps; new_ps] | [_] -> - raise - Api_errors.( - Server_error - (internal_error, ["notify_new: old pool secret doesn't match"]) - ) + Helpers.internal_error "notify_new: old pool secret doesn't match" | l -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "notify_new: expected 1 or 2 pool secrets, got: %i" - (List.length l) - ] - ) - ) + Helpers.internal_error "notify_new: expected 1 or 2 pool secrets, got: %i" + (List.length l) let notify_send ~__context ~old_ps ~new_ps = Xapi_fist.hang_psr `notify_send ; @@ -470,22 +410,10 @@ let notify_send ~__context ~old_ps ~new_ps = -> D.info "xapi_psr.ml:notify_send: already sending new_ps" | [_; _] -> - raise - Api_errors.( - Server_error - (internal_error, ["notify_send: runtime secrets don't match"]) - ) + Helpers.internal_error "notify_send: runtime secrets don't match" | l -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "notify_send: expected 2 pool secrets, got: %i" - (List.length l) - ] - ) - ) + Helpers.internal_error "notify_send: expected 2 pool secrets, got: %i" + (List.length l) let cleanup ~__context ~old_ps ~new_ps = Xapi_fist.hang_psr `cleanup ; @@ -505,10 +433,7 @@ let start = if Mutex.try_lock m then ( try f () ; Mutex.unlock m with e -> Mutex.unlock m ; raise e ) else - raise - Api_errors.( - Server_error (internal_error, ["pool secret rotation already running"]) - ) + Helpers.internal_error "pool secret rotation already running" in fun ~__context -> let self = Helpers.get_pool ~__context in @@ -580,5 +505,5 @@ let start = | Error e -> let err_msg = user_facing_error_message ~__context e in D.error "PSR failed: %s" err_msg ; - raise Api_errors.(Server_error (internal_error, [err_msg])) + Helpers.internal_error "%s" err_msg ) diff --git a/ocaml/xapi/xapi_pusb.ml b/ocaml/xapi/xapi_pusb.ml index e1bf3e82acb..2d3a9a5bceb 100644 --- a/ocaml/xapi/xapi_pusb.ml +++ b/ocaml/xapi/xapi_pusb.ml @@ -186,16 +186,8 @@ let set_passthrough_enabled ~__context ~self ~value = | [] -> () | _ :: _ :: _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "too many vusb on the USB_group: %s" - (Ref.string_of usb_group) - ] - ) - ) + Helpers.internal_error "too many vusb on the USB_group: %s" + (Ref.string_of usb_group) | [vusb] -> let currently_attached = Db.VUSB.get_currently_attached ~__context ~self:vusb diff --git a/ocaml/xapi/xapi_pusb_helpers.ml b/ocaml/xapi/xapi_pusb_helpers.ml index 92560a7f18f..b82134f866f 100644 --- a/ocaml/xapi/xapi_pusb_helpers.ml +++ b/ocaml/xapi/xapi_pusb_helpers.ml @@ -98,13 +98,7 @@ let get_script_stdout () = let stdout, _ = Forkhelpers.execute_command_get_output usb_scan_script [] in stdout with Forkhelpers.Spawn_internal_error (_, _, Unix.WEXITED n) -> - raise - Api_errors.( - Server_error - ( internal_error - , [Printf.sprintf "%s exitted with %d" usb_scan_script n] - ) - ) + Helpers.internal_error "%s exitted with %d" usb_scan_script n let get_usbs stdout = let extract_devices json = [json] |> flatten in diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index 95d310a085e..cdbbf638e86 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -901,12 +901,8 @@ let login_with_password ~__context ~uname ~pwd ~version:_ ~originator = | role :: _ -> role | [] -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [Datamodel_roles.role_client_cert ^ " role not found"] - ) - ) + Helpers.internal_error "%s role not found" + Datamodel_roles.role_client_cert in let rbac_permissions = Xapi_role.get_permissions_name_label ~__context ~self:role diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index b6d8caf5dda..d09490b9521 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -806,10 +806,7 @@ let scan ~__context ~sr = limit ; (scan_rec [@tailcall]) (limit - 1) ) else if limit = 0 then - raise - (Api_errors.Server_error - (Api_errors.internal_error, ["SR.scan retry limit exceeded"]) - ) + Helpers.internal_error "SR.scan retry limit exceeded" else ( update_vdis ~__context ~sr db_vdis_after vs ; let virtual_allocation = @@ -932,16 +929,8 @@ let assert_supports_database_replication ~__context ~sr = with | [] -> (* This should never happen because the PBDs are plugged in *) - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - "SR does not have corresponding SM record" - ; Ref.string_of sr - ; srtype - ] - ) - ) + Helpers.internal_error "SR does not have corresponding SM record: %s %s" + (Ref.string_of sr) srtype | (_, sm) :: _ -> if not (List.mem_assoc "SR_METADATA" sm.Db_actions.sM_features) then raise diff --git a/ocaml/xapi/xapi_sr_operations.ml b/ocaml/xapi/xapi_sr_operations.ml index 263f002d474..75a3c695af4 100644 --- a/ocaml/xapi/xapi_sr_operations.ml +++ b/ocaml/xapi/xapi_sr_operations.ml @@ -250,16 +250,9 @@ let valid_operations ~__context ?op record _ref' : table = let throw_error (table : table) op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_sr.assert_operation_valid unknown operation: %s" - (sr_operation_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_sr.assert_operation_valid unknown operation: %s" + (sr_operation_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> diff --git a/ocaml/xapi/xapi_templates.ml b/ocaml/xapi/xapi_templates.ml index 77bec3f843a..0c0ae87631d 100644 --- a/ocaml/xapi/xapi_templates.ml +++ b/ocaml/xapi/xapi_templates.ml @@ -56,13 +56,7 @@ let string2vdi_type s = | "crashdump" -> `crashdump | vdi_st -> - raise - Api_errors.( - Server_error - ( internal_error - , [Printf.sprintf "string2vdi_type: Unknown VDI type \"%s\"" vdi_st] - ) - ) + Helpers.internal_error "string2vdi_type: Unknown VDI type \"%s\"" vdi_st exception Parse_failure diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index a63fa6edf1f..91fedaebaeb 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -305,16 +305,9 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let throw_error (table : table) op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_vbd_helpers.assert_operation_valid unknown operation: %s" - (vbd_operations_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_vbd_helpers.assert_operation_valid unknown operation: %s" + (vbd_operations_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index da6ede482fa..554b1dcea07 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -157,16 +157,9 @@ let valid_operations ~__context record _ref' : table = let throw_error (table : table) op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_vif_helpers.assert_operation_valid unknown operation: %s" - (vif_operations_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_vif_helpers.assert_operation_valid unknown operation: %s" + (vif_operations_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 9631c03a8f8..78967197a8f 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -157,10 +157,7 @@ let set_dom0_memory ~__context ~self:_ ~bytes = Xapi_host_helpers.Host_requires_reboot.set () with e -> error "Failed to update dom0 memory: %s" (Printexc.to_string e) ; - raise - Api_errors.( - Server_error (internal_error, ["Failed to update dom0 memory"]) - ) + Helpers.internal_error "Failed to update dom0 memory" let set_memory_static_range ~__context ~self ~min ~max = (* For non-control domains, this function is only called on the master and @@ -363,10 +360,8 @@ let start ~__context ~vm ~start_paused ~force = let assert_host_is_localhost ~__context ~host = let localhost = Helpers.get_localhost ~__context in if host <> localhost then - let msg = + Helpers.internal_error "Error in message forwarding layer: host parameter was not localhost" - in - raise (Api_errors.Server_error (Api_errors.internal_error, [msg])) let start_on ~__context ~vm ~host ~start_paused ~force = (* If we modify this to support start_on other-than-localhost, @@ -1392,13 +1387,8 @@ let set_suspend_VDI ~__context ~self ~value = | `Fail e -> raise e | `Pending -> - raise - Api_errors.( - Server_error - ( internal_error - , ["set_suspend_VDI: The operation is still `Pending"] - ) - ) + Helpers.internal_error + "set_suspend_VDI: The operation is still `Pending" in let src_checksum = get_result src_thread src_result in let dst_checksum = get_result dst_thread dst_result in diff --git a/ocaml/xapi/xapi_vm_group_helpers.ml b/ocaml/xapi/xapi_vm_group_helpers.ml index 87fc15b10b5..35268da9672 100644 --- a/ocaml/xapi/xapi_vm_group_helpers.ml +++ b/ocaml/xapi/xapi_vm_group_helpers.ml @@ -87,13 +87,10 @@ let alert_matched ~__context ~label_name ~id alert = match filtered with [uuid] when uuid = id -> true | _ -> false ) | _ -> - let msg = "Invalid message body of VM group alert" in - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true + "Invalid message body of VM group alert" | exception e -> - let msg = Printf.sprintf "%s" (ExnHelper.string_of_exn e) in - error "%s" msg ; - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error ~log_err:true "%s" (ExnHelper.string_of_exn e) let filter_alerts_with_group ~__context ~group ~alerts = let group_uuid = Db.VM_group.get_uuid ~__context ~self:group in diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 914cfd15e8a..9ab13f79b54 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -988,18 +988,13 @@ let assert_initial_power_state_is ~expected = let assert_final_power_state_in ~__context ~self ~allowed = let actual = Db.VM.get_power_state ~__context ~self in if not (List.mem actual allowed) then - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - "VM not in expected power state after completing operation" - ; Ref.string_of self - ; List.map Record_util.vm_power_state_to_lowercase_string allowed - |> String.concat ";" - ; Record_util.vm_power_state_to_lowercase_string actual - ] - ) + Helpers.internal_error + "VM not in expected power state after completing operation: %s, %s, %s" + (Ref.string_of self) + (List.map Record_util.vm_power_state_to_lowercase_string allowed + |> String.concat ";" ) + (Record_util.vm_power_state_to_lowercase_string actual) (** Assert that VM is in a certain state after completing an operation *) let assert_final_power_state_is ~expected = diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index fb0c3aba577..9a4399e9148 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1904,16 +1904,9 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options = [] ) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - "assert_can_migrate: inter_pool_metadata_transfer returned \ - a nonempty list" - ] - ) - ) + Helpers.internal_error + "assert_can_migrate: inter_pool_metadata_transfer returned a \ + nonempty list" with Xmlrpc_client.Connection_reset -> raise (Api_errors.Server_error diff --git a/ocaml/xapi/xapi_vm_snapshot.ml b/ocaml/xapi/xapi_vm_snapshot.ml index 4ef856d1630..a7fc76a8417 100644 --- a/ocaml/xapi/xapi_vm_snapshot.ml +++ b/ocaml/xapi/xapi_vm_snapshot.ml @@ -161,10 +161,9 @@ let checkpoint ~__context ~vm ~new_name = (* The following code have to run on the master as it manipulates the DB cache directly. *) let copy_vm_fields ~__context ~metadata ~dst ~do_not_copy ~overrides = - ( if not (Pool_role.is_master ()) then - let msg = "copy_vm_fields: Aborting because the host is not master" in - raise Api_errors.(Server_error (internal_error, [msg])) - ) ; + if not (Pool_role.is_master ()) then + Helpers.internal_error + "copy_vm_fields: Aborting because the host is not master" ; debug "copying metadata into %s" (Ref.string_of dst) ; let db = Context.database_of __context in let module DB = diff --git a/ocaml/xapi/xapi_vtpm.ml b/ocaml/xapi/xapi_vtpm.ml index a9acf9626fb..6e9d96e97f9 100644 --- a/ocaml/xapi/xapi_vtpm.ml +++ b/ocaml/xapi/xapi_vtpm.ml @@ -100,8 +100,7 @@ let set_contents ~__context ~self ~contents = let _ = (* verify contents to be already base64-encoded *) try Base64.decode contents - with Invalid_argument err -> - raise Api_errors.(Server_error (internal_error, [err])) + with Invalid_argument err -> Helpers.internal_error "%s" err in let secret = Xapi_secret.create ~__context ~value:contents ~other_config:[] in Db.VTPM.set_contents ~__context ~self ~value:secret ; diff --git a/ocaml/xapi/xapi_vusb_helpers.ml b/ocaml/xapi/xapi_vusb_helpers.ml index 2b9e0805865..9b1870cf141 100644 --- a/ocaml/xapi/xapi_vusb_helpers.ml +++ b/ocaml/xapi/xapi_vusb_helpers.ml @@ -94,17 +94,9 @@ let valid_operations ~__context record _ref' : table = let throw_error (table : table) op = match Hashtbl.find_opt table op with | None -> - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - Printf.sprintf - "xapi_vusb_helpers.assert_operation_valid unknown operation: \ - %s" - (vusb_operations_to_string op) - ] - ) - ) + Helpers.internal_error + "xapi_vusb_helpers.assert_operation_valid unknown operation: %s" + (vusb_operations_to_string op) | Some (Some (code, params)) -> raise (Api_errors.Server_error (code, params)) | Some None -> diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 852852cdc2d..3e9d8f89274 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -180,16 +180,8 @@ let backend_of_vif ~__context ~vif = in Network.Sriov {domain; bus; dev; fn} else - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "No reserved_pci for network SR-IOV vif %s" - (Ref.string_of vif) - ] - ) - ) + Helpers.internal_error "No reserved_pci for network SR-IOV vif %s" + (Ref.string_of vif) else backend_of_network net @@ -530,8 +522,7 @@ let builder_of_vm ~__context (vmref, vm) timeoffset pci_passthrough vgpu = | `pvh, Helpers.Indirect options -> PVH (make_indirect_boot_record options) | _ -> - let msg = "invalid boot configuration" in - raise Api_errors.(Server_error (internal_error, [msg])) + Helpers.internal_error "invalid boot configuration" let list_net_sriov_vf_pcis ~__context ~vm = vm.API.vM_VIFs @@ -3141,16 +3132,8 @@ let on_xapi_restart ~__context = let assert_resident_on ~__context ~self = let localhost = Helpers.get_localhost ~__context in if not (Db.VM.get_resident_on ~__context ~self = localhost) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "the VM %s is not resident on this host" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "the VM %s is not resident on this host" + (Ref.string_of self) module Events_from_xapi = struct let greatest_token = ref "" @@ -3796,31 +3779,15 @@ let shutdown ~__context ~self timeout = ~expected:`Halted ; (* force_state_reset called from the xenopsd event loop above *) if not (Db.VM.get_resident_on ~__context ~self = Ref.null) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "shutdown: The VM %s is still resident on the host" - (Ref.string_of self) - ] - ) - ) ; + Helpers.internal_error + "shutdown: The VM %s is still resident on the host" + (Ref.string_of self) ; List.iter (fun vbd -> if Db.VBD.get_currently_attached ~__context ~self:vbd then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "shutdown: The VBD %s is still attached to VM %s" - (Ref.string_of vbd) (Ref.string_of self) - ] - ) - ) + Helpers.internal_error + "shutdown: The VBD %s is still attached to VM %s" + (Ref.string_of vbd) (Ref.string_of self) ) (Db.VM.get_VBDs ~__context ~self) ) @@ -3881,17 +3848,9 @@ let suspend ~__context ~self = ) && not (Db.VM.get_resident_on ~__context ~self = Ref.null) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "suspend: The VM %s is still resident on the host" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error + "suspend: The VM %s is still resident on the host" + (Ref.string_of self) with e -> error "Caught exception suspending VM: %s" (string_of_exn e) ; (* If the domain has suspended, we have to shut it down *) @@ -4012,16 +3971,8 @@ let vbd_plug ~__context ~self = Client.VBD.plug dbg id |> sync_with_task __context queue_name ) ; if not (Db.VBD.get_currently_attached ~__context ~self) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vbd_plug: Unable to plug VBD %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vbd_plug: Unable to plug VBD %s" + (Ref.string_of self) ) let vbd_unplug ~__context ~self force = @@ -4049,16 +4000,8 @@ let vbd_unplug ~__context ~self force = ) ; Events_from_xenopsd.wait queue_name dbg (fst vbd.Vbd.id) () ; if Db.VBD.get_currently_attached ~__context ~self then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vbd_unplug: Unable to unplug VBD %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vbd_unplug: Unable to unplug VBD %s" + (Ref.string_of self) ) let vbd_eject_hvm ~__context ~self = @@ -4074,30 +4017,13 @@ let vbd_eject_hvm ~__context ~self = Events_from_xenopsd.wait queue_name dbg (fst vbd.Vbd.id) () ; Events_from_xapi.wait ~__context ~self:vm ; if not (Db.VBD.get_empty ~__context ~self) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "vbd_eject_hvm: The VBD %s has not been emptied" - (Ref.string_of self) - ] - ) - ) ; + Helpers.internal_error "vbd_eject_hvm: The VBD %s has not been emptied" + (Ref.string_of self) ; let vdi = Db.VBD.get_VDI ~__context ~self in if not (vdi = Ref.null) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "vbd_eject_hvm: The VBD %s is still connected to VDI %s" - (Ref.string_of self) (Ref.string_of vdi) - ] - ) - ) + Helpers.internal_error + "vbd_eject_hvm: The VBD %s is still connected to VDI %s" + (Ref.string_of self) (Ref.string_of vdi) ) let vbd_insert_hvm ~__context ~self ~vdi = @@ -4115,30 +4041,14 @@ let vbd_insert_hvm ~__context ~self ~vdi = Events_from_xenopsd.wait queue_name dbg (fst vbd.Vbd.id) () ; Events_from_xapi.wait ~__context ~self:vm ; if Db.VBD.get_empty ~__context ~self then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vbd_insert_hvm: The VBD %s is empty" - (Ref.string_of self) - ] - ) - ) ; + Helpers.internal_error "vbd_insert_hvm: The VBD %s is empty" + (Ref.string_of self) ; let vdi' = Db.VBD.get_VDI ~__context ~self in if not (vdi' = vdi) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "vbd_insert_hvm: The VBD %s has been connected to the \ - wrong VDI (expected %s, got %s)" - (Ref.string_of self) (Ref.string_of vdi) (Ref.string_of vdi) - ] - ) - ) + Helpers.internal_error + "vbd_insert_hvm: The VBD %s has been connected to the wrong VDI \ + (expected %s, got %s)" + (Ref.string_of self) (Ref.string_of vdi) (Ref.string_of vdi) ) let has_qemu ~__context ~vm = @@ -4201,16 +4111,8 @@ let vif_plug ~__context ~self = ) ) ; if not (Db.VIF.get_currently_attached ~__context ~self) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vif_plug: Unable to plug VIF %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vif_plug: Unable to plug VIF %s" + (Ref.string_of self) ) let vif_set_locking_mode ~__context ~self = @@ -4257,16 +4159,8 @@ let vif_unplug ~__context ~self force = (* We need to make sure VIF.stat still works so: wait before calling VIF.remove *) Events_from_xenopsd.wait queue_name dbg (fst vif.Vif.id) () ; if Db.VIF.get_currently_attached ~__context ~self then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vif_unplug: Unable to unplug VIF %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vif_unplug: Unable to unplug VIF %s" + (Ref.string_of self) with Xenopsd_error (Does_not_exist _) -> info "VIF is not plugged; setting currently_attached to false" ; Db.VIF.set_currently_attached ~__context ~self ~value:false @@ -4282,17 +4176,9 @@ let vif_move ~__context ~self _network = let backend = backend_of_vif ~__context ~vif:self in match backend with | Network.Sriov _ -> - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf - "vif_move: Unable to move a network SR-IOV backed VIF %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error + "vif_move: Unable to move a network SR-IOV backed VIF %s" + (Ref.string_of self) | _ -> let dbg = Context.string_of_task_and_tracing __context in let module Client = (val make_client queue_name : XENOPS) in @@ -4302,16 +4188,8 @@ let vif_move ~__context ~self _network = |> sync_with_task __context queue_name ; Events_from_xenopsd.wait queue_name dbg (fst vif.Vif.id) () ; if not (Db.VIF.get_currently_attached ~__context ~self) then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vif_move: Unable to plug moved VIF %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vif_move: Unable to plug moved VIF %s" + (Ref.string_of self) ) let vif_set_ipv4_configuration ~__context ~self = @@ -4380,16 +4258,8 @@ let vusb_unplug_hvm ~__context ~self = Client.VUSB.unplug dbg vusb.Vusb.id |> sync_with_task __context queue_name ; Events_from_xenopsd.wait queue_name dbg (fst vusb.Vusb.id) () ; if Db.VUSB.get_currently_attached ~__context ~self then - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vusb_unplug: Unable to unplug VUSB %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vusb_unplug: Unable to unplug VUSB %s" + (Ref.string_of self) ) let vusb_plugable ~__context ~self = @@ -4400,16 +4270,8 @@ let vusb_unplug ~__context ~self = if vusb_plugable ~__context ~self then vusb_unplug_hvm ~__context ~self else - raise - Api_errors.( - Server_error - ( internal_error - , [ - Printf.sprintf "vusb_unplug: Unable to unplug vusb %s" - (Ref.string_of self) - ] - ) - ) + Helpers.internal_error "vusb_unplug: Unable to unplug vusb %s" + (Ref.string_of self) module Observer = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = diff --git a/ocaml/xapi/xha_statefile.ml b/ocaml/xapi/xha_statefile.ml index abcae2d1697..54428684a44 100644 --- a/ocaml/xapi/xha_statefile.ml +++ b/ocaml/xapi/xha_statefile.ml @@ -116,16 +116,8 @@ let check_sr_can_host_statefile ~__context ~sr ~cluster_stack = with | [] -> (* This should never happen because the PBDs are plugged in *) - raise - (Api_errors.Server_error - ( Api_errors.internal_error - , [ - "SR does not have corresponding SM record" - ; Ref.string_of sr - ; srtype - ] - ) - ) + Helpers.internal_error "SR does not have corresponding SM record: %s %s" + (Ref.string_of sr) srtype | (_, sm) :: _ -> if (not (List.mem_assoc "VDI_GENERATE_CONFIG" sm.Db_actions.sM_features)) From fa929c4e3bf78fa6595ab16c48038bbd404d9534 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 11 Feb 2025 15:19:32 +0800 Subject: [PATCH 064/117] CA-403867: Block pool join if IP not configured on cluster network To join a host into a pool with cluster enabled, the host must have one and only one IP configured on the joining cluster network. If not, after the host joinied the pool, GFS2 SR cannot be plugged on the joined host because an IP is required in the cluster network. Pool join in this scenario has been blocked in XenCenter, here we will block it inside xapi. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_errors.ml | 8 ++++ ocaml/xapi-consts/api_errors.ml | 6 +++ ocaml/xapi/xapi_pool.ml | 77 +++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index bdd2e54a484..6dac4669fee 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -897,6 +897,14 @@ let _ = the pool coordinator. Make sure the sm are of the same versions and try \ again." () ; + error Api_errors.pool_joining_pool_cannot_enable_clustering_on_vlan_network + ["vlan"] ~doc:"The remote pool cannot enable clustering on vlan network" () ; + error Api_errors.pool_joining_host_must_have_only_one_IP_on_clustering_network + [] + ~doc: + "The host joining the pool must have one and only one IP on the \ + clustering network" + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index a621a825a9c..790da3aeda7 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -757,6 +757,12 @@ let pool_joining_host_ca_certificates_conflict = let pool_joining_sm_features_incompatible = add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" +let pool_joining_pool_cannot_enable_clustering_on_vlan_network = + add_error "POOL_JOINING_POOL_CANNOT_ENABLE_CLUSTERING_ON_VLAN_NETWORK" + +let pool_joining_host_must_have_only_one_IP_on_clustering_network = + add_error "POOL_JOINING_HOST_MUST_HAVE_ONLY_ONE_IP_ON_CLUSTERING_NETWORK" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 05d50e97784..fdebcfcc003 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -112,6 +112,82 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let one_ip_configured_on_joining_cluster_network () = + match Client.Cluster_host.get_all ~rpc ~session_id with + | [] -> + () + | ch :: _ -> ( + let cluster = + Client.Cluster_host.get_cluster ~rpc ~session_id ~self:ch + in + match + Client.Cluster.get_pool_auto_join ~rpc ~session_id ~self:cluster + with + | false -> + () + | true -> ( + match Client.Cluster_host.get_PIF ~rpc ~session_id ~self:ch with + | pif when pif = Ref.null -> + () + | pif -> ( + match Client.PIF.get_VLAN ~rpc ~session_id ~self:pif with + | vlan when vlan > 0L -> + error + "Cannot join pool whose clustering is enabled on VLAN network" ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_pool_cannot_enable_clustering_on_vlan_network + , [Int64.to_string vlan] + ) + ) + | 0L | _ -> ( + let clustering_devices_in_pool = + ( match + Client.PIF.get_bond_master_of ~rpc ~session_id ~self:pif + with + | [] -> + [pif] + | bonds -> + List.concat_map + (fun bond -> + Client.Bond.get_slaves ~rpc ~session_id ~self:bond + ) + bonds + ) + |> List.map (fun self -> + Client.PIF.get_device ~rpc ~session_id ~self + ) + in + match + Db.Host.get_PIFs ~__context + ~self:(Helpers.get_localhost ~__context) + |> List.filter (fun p -> + List.exists + (fun d -> Db.PIF.get_device ~__context ~self:p = d) + clustering_devices_in_pool + && Db.PIF.get_IP ~__context ~self:p <> "" + ) + with + | [_] -> + () + | _ -> + error + "Cannot join pool as the joining host needs to have one \ + (and only one) IP address on the network that will be \ + used for clustering." ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_host_must_have_only_one_IP_on_clustering_network + , [] + ) + ) + ) + ) + ) + ) + in (* CA-26975: Pool edition MUST match *) let assert_restrictions_match () = let my_edition = @@ -888,6 +964,7 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_management_interface_exists () ; ha_is_not_enable_on_me () ; clustering_is_not_enabled_on_me () ; + one_ip_configured_on_joining_cluster_network () ; ha_is_not_enable_on_the_distant_pool () ; assert_not_joining_myself () ; assert_i_know_of_no_other_hosts () ; From a3f2c6e8f4edc089913b3f0208b0140ca0fef0c5 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 6 Jan 2025 14:42:24 +0000 Subject: [PATCH 065/117] CA-403744: Implement other_config operations Currently, users with read-only permissions can freely manipulate their own tasks (create, cancel, destroy, etc.). However, they cannot easily manipulate the "other_config" field. Some keys are specified in the datamodel as being writeable by subjects with the VM operator role. However, RBAC checking for keys was only ever implemented for the individual "add" and "remove" operations, not the "set_other_config" operation. This makes sense because the typical scenario is that the required writer role for an "other_config" field is more privileged than each of the individually-specified key-specific roles. In the case of tasks, the desired role for the set_other_config operation is the read-only role. However, we cannot simply demote the required "writer role" for the other_config field, because: 1) We must maintain key-based RBAC checks for the operation. For example, if a read-only user attempts to modify a task's other_config using set_other_config, the operation must fail if they've attempted to modify the value mapped to by some key that they are not privileged to use. 2) Key-based RBAC checking is special cased to "add_to" and "remove_from". You can attempt to ascribe key-related role restrictions to arbitrary messages but these will all fail at runtime, when invoked - because Rbac.check is special cased to expect to be able to extract out the key name from a "key" argument it receives, which is emitted for "add_to" and "remove_from". 3) There is an additional restriction that read-only users should not be able to modify arbitrary tasks, only their own. Both of these points require a custom implementation. To this end, we mark "other_config" as read-only (DynamicRO) and implement custom handlers for the "add_to", "remove_from", and "set" operations. In doing so, we implement a subset of the RBAC protection logic for keys. This custom implementation amounts to a relaxation of the usual RBAC rules: where "add_to" and "remove_from" (both purely destructive operations) cite a protected key (that they are not permitted to write), RBAC fails. In the custom implementation of "set_other_config", an under-privileged session can cite any key so long as their change is not destructive (it must preserve what is already there). Signed-off-by: Colin James --- ocaml/idl/datamodel.ml | 79 ++++++++++++- ocaml/idl/datamodel_lifecycle.ml | 6 + ocaml/idl/schematest.ml | 2 +- ocaml/xapi/rbac.ml | 117 +++++++++++-------- ocaml/xapi/rbac.mli | 20 ++++ ocaml/xapi/xapi_task.ml | 195 +++++++++++++++++++++++++++++++ 6 files changed, 366 insertions(+), 53 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 2ee5f705c22..5fc74d58fc2 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -551,6 +551,80 @@ module Task = struct let task_allowed_operations = Enum ("task_allowed_operations", List.map operation_enum [cancel; destroy]) + module Special = struct + (* These keys are usually ascribed to the field directly but, + since we are providing custom implementations, we ascribe them + to the messages themselves. + + Note that only the "add_to" and "remove_from" messages are + protected by these keys. This is because the current RBAC logic + is special cased to those messages. The "set_other_config" + message has a relaxed RBAC restriction by comparison, and its + checking logic is defined in terms of the permissions created + for the "add_to" and "remove_from" operations. + + The difference is subtle: if a session attempts to perform + "add_to"/"remove_from" upon "other_config", those operations + are purely destructive and RBAC checking can be done by the + current logic in Rbac.check (which guards the action). However, + in the case of "set_other_config", we relax the restriction and + must do the RBAC checking ourselves. The relaxed restriction is + that the call may maintain entries that it cannot change + itself. This means a read-only user can technically supply a + map containing privileged entries, so long as those entries are + already present. This allows read-only users to update a subset + of the entries within "other_config". + *) + let protected_keys = + [ + ("applies_to", _R_VM_OP) + ; ("XenCenterUUID", _R_VM_OP) + ; ("XenCenterMeddlingActionTitle", _R_VM_OP) + ] + + let call = call ~lifecycle:[] ~errs:[] ~allowed_roles:_R_READ_ONLY + + let add_to_other_config = + call ~name:"add_to_other_config" + ~doc: + "Add the given key-value pair to the other_config field of the given \ + task." + ~params: + [ + (Ref _task, "self", "Task object to modify") + ; (String, "key", "Key to add") + ; (String, "value", "Value to add") + ] + ~map_keys_roles:protected_keys () + + let remove_from_other_config = + call ~name:"remove_from_other_config" + ~doc: + "Remove the given key and its corresponding value from the \ + other_config field of the given task. If the key is not in that \ + Map, then do nothing." + ~params: + [ + (Ref _task, "self", "Task object to modify") + ; (String, "key", "Key of entry to remove") + ] + (* Privileged key permissions are generated for each of these protected keys. *) + ~map_keys_roles:protected_keys () + + (* We cannot cite the protected keys here as the current RBAC + logic only works for "add_to" and "remove_from" and, even if it + did, it is too strict. *) + let set_other_config = + call ~name:"set_other_config" + ~doc:"Set the other_config field of the given task." + ~params: + [ + (Ref _task, "self", "Task object to modify") + ; (Map (String, String), "value", "New value to set") + ] + () + end + let t = create_obj ~in_db:true ~lifecycle:[(Published, rel_rio, "A long-running asynchronous task")] @@ -567,6 +641,9 @@ module Task = struct ; set_progress ; set_result ; set_error_info + ; Special.add_to_other_config + ; Special.remove_from_other_config + ; Special.set_other_config ] ~contents: ([ @@ -716,7 +793,7 @@ module Task = struct "error_info" "if the task has failed, this field contains the set of \ associated error strings. Undefined otherwise." - ; field + ; field ~qualifier:DynamicRO ~lifecycle:[(Published, rel_miami, "additional configuration")] ~default_value:(Some (VMap [])) ~ty:(Map (String, String)) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index d2f55d42dde..47b7b21010b 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -247,5 +247,11 @@ let prototyped_of_message = function Some "22.27.0" | "pool", "set_custom_uefi_certificates" -> Some "24.0.0" + | "task", "set_other_config" -> + Some "25.2.0-next" + | "task", "remove_from_other_config" -> + Some "25.2.0-next" + | "task", "add_to_other_config" -> + Some "25.2.0-next" | _ -> None diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index d17b6cf488a..25ff1933a65 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "05ac9223f9c17b07b12e328d5dc3db52" +let last_known_schema_hash = "e6b99e0d07ccf68df8f45c851e5d8dbf" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/xapi/rbac.ml b/ocaml/xapi/rbac.ml index 2a8555cc9a9..121289204db 100644 --- a/ocaml/xapi/rbac.ml +++ b/ocaml/xapi/rbac.ml @@ -161,80 +161,95 @@ let is_permission_in_session ~session_id ~permission ~session = open Db_actions -(* look up the list generated in xapi_session.get_permissions *) -let is_access_allowed ~__context ~session_id ~permission = - (* always allow local system access *) - if Session_check.is_local_session __context session_id then - true (* normal user session *) +(* Given a list of permissions, determine if the given session is + permitted to perform the related actions. If not, stop and return the + first disallowed permission. This stops us doing redundant checks + but also is consistent with the current RBAC error reporting, where + a single action is usually reported. *) +let find_first_disallowed_permission ~__context ~session_id ~permissions = + let is_local_session () = + Session_check.is_local_session __context session_id + in + let doesn't_have_permission session permission = + is_permission_in_session ~session_id ~permission ~session = false + in + (* Test session properties before querying permission sets. *) + if is_local_session () then + None else let session = DB_Action.Session.get_record ~__context ~self:session_id in - (* the root user can always execute anything *) if session.API.session_is_local_superuser then - true - (* not root user, so let's decide if permission is allowed or denied *) + None else - is_permission_in_session ~session_id ~permission ~session + List.find_opt (doesn't_have_permission session) permissions + +(* Determine if session has a given permission. *) +let is_access_allowed ~__context ~session_id ~permission = + find_first_disallowed_permission ~__context ~session_id + ~permissions:[permission] + |> Option.is_none + +let get_session_of_context ~__context ~permission = + try Context.get_session_id __context + with Failure _ -> + let msg = "no session in context" in + raise Api_errors.(Server_error (rbac_permission_denied, [permission; msg])) + +let disallowed_permission_exn ?(extra_dmsg = "") ?(extra_msg = "") ~__context + ~permission ~action = + let session_id = get_session_of_context ~__context ~permission in + let allowed_roles = + try + Xapi_role.get_by_permission_name_label ~__context ~label:permission + |> List.map (fun self -> Xapi_role.get_name_label ~__context ~self) + |> String.concat ", " + with e -> + debug "Could not obtain allowed roles for %s (%s)" permission + (ExnHelper.string_of_exn e) ; + "" + in + let msg = + Printf.sprintf + "No permission in user session. (Roles with this permission: %s)%s" + allowed_roles extra_msg + in + debug "%s[%s]: %s %s %s" action permission msg (trackid session_id) extra_dmsg ; + raise Api_errors.(Server_error (rbac_permission_denied, [permission; msg])) -(* Execute fn if rbac access is allowed for action, otherwise fails. *) let nofn () = () let check ?(extra_dmsg = "") ?(extra_msg = "") ?args ?(keys = []) ~__context ~fn session_id action = let permission = permission_of_action action ?args ~keys in - if is_access_allowed ~__context ~session_id ~permission then ( - (* allow access to action *) + let allow_access () = + (* Allow access to action. *) let sexpr_of_args = Rbac_audit.allowed_pre_fn ~__context ~action ?args () in try - let result = fn () (* call rbac-protected function *) in + (* Call the RBAC-protected function. *) + let result = fn () in Rbac_audit.allowed_post_fn_ok ~__context ~session_id ~action ~permission ?sexpr_of_args ?args ~result () ; result with error -> + (* Catch all exceptions and log to RBAC audit log. *) Backtrace.is_important error ; - (* catch all exceptions *) Rbac_audit.allowed_post_fn_error ~__context ~session_id ~action ~permission ?sexpr_of_args ?args ~error () ; + (* Re-raise. *) raise error - ) else (* deny access to action *) - let allowed_roles_string = - try - let allowed_roles = - Xapi_role.get_by_permission_name_label ~__context ~label:permission - in - List.fold_left - (fun acc allowed_role -> - acc - ^ (if acc = "" then "" else ", ") - ^ Xapi_role.get_name_label ~__context ~self:allowed_role - ) - "" allowed_roles - with e -> - debug "Could not obtain allowed roles for %s (%s)" permission - (ExnHelper.string_of_exn e) ; - "" - in - let msg = - Printf.sprintf - "No permission in user session. (Roles with this permission: %s)%s" - allowed_roles_string extra_msg - in - debug "%s[%s]: %s %s %s" action permission msg (trackid session_id) - extra_dmsg ; + in + let deny_access () = + (* Deny access to action, raising an exception. *) Rbac_audit.denied ~__context ~session_id ~action ~permission ?args () ; raise - (Api_errors.Server_error - (Api_errors.rbac_permission_denied, [permission; msg]) - ) - -let get_session_of_context ~__context ~permission = - try Context.get_session_id __context - with Failure _ -> - raise - (Api_errors.Server_error - ( Api_errors.rbac_permission_denied - , [permission; "no session in context"] - ) + (disallowed_permission_exn ~extra_dmsg ~extra_msg ~__context ~permission + ~action ) + in + if is_access_allowed ~__context ~session_id ~permission then + allow_access () + else + deny_access () let assert_permission_name ~__context ~permission = let session_id = get_session_of_context ~__context ~permission in diff --git a/ocaml/xapi/rbac.mli b/ocaml/xapi/rbac.mli index 6905379a311..8ee7ad092ff 100644 --- a/ocaml/xapi/rbac.mli +++ b/ocaml/xapi/rbac.mli @@ -22,6 +22,26 @@ val is_access_allowed : (on the coordinator only) to benefit successive queries for the same session. *) +val find_first_disallowed_permission : + __context:Context.t + -> session_id:[`session] Ref.t + -> permissions:string list + -> string option +(** Given a list of permissions, determine if the given session is + permitted to perform the related actions. If not, stop and return + the first disallowed permssion (without considering the remaining ones). *) + +val disallowed_permission_exn : + ?extra_dmsg:string + -> ?extra_msg:string + -> __context:Context.t + -> permission:string + -> action:string + -> exn +(** Create an RBAC_PERMISSION_DENIED exception for the given + permission. Attempts to report the role(s) which do have the given + permission (if any). *) + val check : ?extra_dmsg:string -> ?extra_msg:string diff --git a/ocaml/xapi/xapi_task.ml b/ocaml/xapi/xapi_task.ml index aef42c01593..8c6ebf00a41 100644 --- a/ocaml/xapi/xapi_task.ml +++ b/ocaml/xapi/xapi_task.ml @@ -88,3 +88,198 @@ let set_resident_on ~__context ~self ~value = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> TaskHelper.assert_op_valid ~__context self ; Db.Task.set_resident_on ~__context ~self ~value + +(* Simple trie data structure that performs a favoured lookup to + implement a simple form of wildcard key matching. The trie is not + pruned during (or after) construction. *) +module MatchTrie = struct + type 'a node = {arrows: (string, 'a node) Hashtbl.t; mutable value: 'a option} + + let create_node () = + let arrows = Hashtbl.create 16 in + let value = None in + {arrows; value} + + let create = create_node + + let insert root ~key ~value = + let parts = String.split_on_char '.' key in + let rec extend focused = function + | part :: parts -> + let next = + match Hashtbl.find_opt focused.arrows part with + | Some node -> + node + | _ -> + let next = create_node () in + Hashtbl.replace focused.arrows part next ; + next + in + extend next parts + | [] -> + focused + in + let final = extend root parts in + final.value <- Some value + + let find root ~key = + let parts = String.split_on_char '.' key in + let rec find focused = function + | part :: parts -> ( + (* Wildcard edges override other edges. *) + match Hashtbl.find_opt focused.arrows "*" with + | Some _ as sink -> + sink + | _ -> ( + match Hashtbl.find_opt focused.arrows part with + | Some next -> + (find [@tailcall]) next parts + | _ -> + None + ) + ) + | _ -> + Some focused + in + match find root parts with Some node -> node.value | _ -> None +end + +(* Given an input key, compare against the protected keys of the + task.other_config field. If a protected key matches, return it. + + For example, if the datamodel specifies "foo.bar.*" as a protected + key, then: match_protected_key ~key:"foo.bar.baz" = Some "foo.bar.*". + + It must return the protected key as that is what key-related RBAC + entries are defined in terms of. +*) +let match_protected_key = + (* Attain the listing of protected keys from the datamodel at module + initialisation. Usually, this list is passed to Rbac.check by + handlers inside the auto-generated server.ml file. *) + let protected_keys = + let api = Datamodel.all_api in + let field = + Dm_api.get_field_by_name api ~objname:"task" ~fieldname:"other_config" + in + List.map fst field.field_map_keys_roles + in + (* Define the lookup function in terms of a simple trie data + structure - which is flexible to account for overlapping paths and + presence of wildcards. *) + let trie = + let root = MatchTrie.create () in + let add key = MatchTrie.insert root ~key ~value:key in + List.iter add protected_keys ; + root + in + MatchTrie.find trie + +let assert_can_modify_other_config ~__context ~task = + TaskHelper.assert_op_valid ~__context task + +let add_to_other_config ~__context ~self ~key ~value = + assert_can_modify_other_config ~__context ~task:self ; + Db.Task.add_to_other_config ~__context ~self ~key ~value + +let remove_from_other_config ~__context ~self ~key = + assert_can_modify_other_config ~__context ~task:self ; + Db.Task.remove_from_other_config ~__context ~self ~key + +(* The behaviour of this function, with respect to RBAC checking, must + match serial "remove_from" and "add_to" operations (for only the keys + that are changing). + + There is normally no key-related RBAC checking for + "set_other_config" because the required writer role for the entire + field is usually higher than the role(s) required for + individually-protected keys. + + Task's "set_other_config" is a special case where read-only + sessions must be able to manipulate a subset of entries (those not + protected by a more privileged role), along with this capability + being restricted to only the task objects that they created. +*) +let set_other_config ~__context ~self ~value = + let module S = Set.Make (String) in + assert_can_modify_other_config ~__context ~task:self ; + let create_lookup kvs = + let table = List.to_seq kvs |> Hashtbl.of_seq in + Hashtbl.find_opt table + in + let old_value = Db.Task.get_other_config ~__context ~self in + let lookup_old, lookup_new = (create_lookup old_value, create_lookup value) in + let keys_before, keys_after = + let keys = List.map fst in + let before = keys old_value in + let after = keys value in + S.(of_list before, of_list after) + in + let keys_removed = + (* Keys no longer appearing in the map. The user must have the + "remove_from" role for each of the protected keys in the set. *) + S.diff keys_before keys_after + in + let keys_unchanged = + (* Keys that persist across the update. If any key in this set is + protected AND the value mapped to by the key would be changed by + the update, the session must have the "add_to" role. *) + let updated = S.inter keys_before keys_after in + let is_entry_unchanged key = + let is_same = + let ( let* ) = Option.bind in + let* old_value = lookup_old key in + let* new_value = lookup_new key in + Some (old_value = new_value) + in + Option.value ~default:false is_same + in + (* Filter out the unchanged entries, as you don't need any + extra privileges to maintain an entry that's already there. *) + S.filter is_entry_unchanged updated + in + let keys_added = + (* Treat all keys as new, unless they're referring to entries that + are unchanged across the update. *) + S.diff keys_after keys_unchanged + in + let permissions = + (* Map each of the added and removed keys to protected keys, if + such a key exists. *) + let filter keys = + S.filter_map (fun key -> match_protected_key ~key) keys |> S.elements + in + let added = filter keys_added in + let removed = filter keys_removed in + let format operation key = + (* All the permissions are stored in lowercase. *) + let key = String.lowercase_ascii key in + Printf.sprintf "task.%s_other_config/key:%s" operation key + in + (* The required permissions are defined in terms of those + generated for "add_to" and "remove_from" (both implemented + above). They can be defined as custom AND use RBAC checking within + server.ml because their operation is purely destructive, so it's + sufficient to guard the entire action with Rbac.check. *) + let added_perms = List.map (format "add_to") added in + let removed_perms = List.map (format "remove_from") removed in + added_perms @ removed_perms + in + (* Find the first disallowed permission, indicating that we cannot + perform the action. *) + let session_id = Context.get_session_id __context in + match + Rbac.find_first_disallowed_permission ~__context ~session_id ~permissions + with + | None -> + (* No disallowed permission, perform the update. *) + Db.Task.set_other_config ~__context ~self ~value + | Some disallowed -> + (* Report it as an RBAC error. *) + let action = "task.set_other_config" in + let extra_msg = "" in + let extra_dmsg = "" in + raise + (Rbac.disallowed_permission_exn ~extra_dmsg ~extra_msg ~__context + ~permission:disallowed ~action + ) From c608902e910e340a4d2eefbed329737bb631651f Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 10 Feb 2025 17:03:42 +0000 Subject: [PATCH 066/117] CP-45795: Decompress compressed trace files without Forkexecd This allows an xs_trace.exe to be run on any linux machine, whereas previously the use of forkexecd meant that it had to be run on a XS host. Signed-off-by: Steven Woods --- ocaml/xs-trace/xs_trace.ml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ocaml/xs-trace/xs_trace.ml b/ocaml/xs-trace/xs_trace.ml index e51847c9256..6360649fb20 100644 --- a/ocaml/xs-trace/xs_trace.ml +++ b/ocaml/xs-trace/xs_trace.ml @@ -20,7 +20,7 @@ module Exporter = struct if json <> "" then match Tracing_export.Destination.Http.export ~url json with | Error err -> - Printf.eprintf "Error: %s" (Printexc.to_string err) ; + Printf.eprintf "Error: %s\n" (Printexc.to_string err) ; exit 1 | _ -> () @@ -34,18 +34,17 @@ module Exporter = struct (* Recursively export trace files. *) Sys.readdir path |> Array.iter (fun f -> Filename.concat path f |> export_file) - | path when Filename.check_suffix path ".zst" -> - (* Decompress compressed trace file and decide whether to - treat it as line-delimited or not. *) - let ( let@ ) = ( @@ ) in - let@ compressed = Unixext.with_file path [O_RDONLY] 0o000 in - let@ decompressed = Zstd.Fast.decompress_passive compressed in - if Filename.check_suffix path ".ndjson.zst" then - let ic = Unix.in_channel_of_descr decompressed in - Unixext.lines_iter submit_json ic - else - let json = Unixext.string_of_fd decompressed in - submit_json json + | path when Filename.check_suffix path ".zst" -> ( + (* Decompress compressed trace file and submit each line iteratively *) + let args = [|"zstdcat"; path|] in + let ic = Unix.open_process_args_in args.(0) args in + Unixext.lines_iter submit_json ic ; + match Unix.close_process_in ic with + | Unix.WEXITED 0 -> + () + | _ -> + Printf.eprintf "File %s exited with non-zero\n" path + ) | path when Filename.check_suffix path ".ndjson" -> (* Submit traces line by line. *) Unixext.readfile_line submit_json path From 3c53bdc45362d1fe1a66d941bfd6a696a18f5604 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 18 Feb 2025 07:09:21 +0000 Subject: [PATCH 067/117] CP-53362: Rename hcp_nss to nss_override_id hcp_nss is a nss module to override the uid/gid of pooladmin when they ssh into dom0, as dom0 only support one single user However, the name wants to be updated to nss_override_id to reflect its usage Signed-off-by: Lin Liu --- python3/plugins/extauth-hook-AD.py | 2 +- python3/{plugins => tests}/test_extauth_hook_AD.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename python3/{plugins => tests}/test_extauth_hook_AD.py (99%) diff --git a/python3/plugins/extauth-hook-AD.py b/python3/plugins/extauth-hook-AD.py index d3e89aae8c8..d867cb9c7f9 100755 --- a/python3/plugins/extauth-hook-AD.py +++ b/python3/plugins/extauth-hook-AD.py @@ -332,7 +332,7 @@ def __init__(self, session, args, ad_enabled=True): "/etc/nsswitch.conf", session, args, ad_enabled) modules = "files sss" if ad_enabled: - modules = "files hcp winbind" + modules = "files override_id winbind" self._update_key_value("passwd", modules) self._update_key_value("group", modules) self._update_key_value("shadow", modules) diff --git a/python3/plugins/test_extauth_hook_AD.py b/python3/tests/test_extauth_hook_AD.py similarity index 99% rename from python3/plugins/test_extauth_hook_AD.py rename to python3/tests/test_extauth_hook_AD.py index eb9d1107e87..11b24154ed7 100644 --- a/python3/plugins/test_extauth_hook_AD.py +++ b/python3/tests/test_extauth_hook_AD.py @@ -201,7 +201,7 @@ def test_ad_not_enabled(self, mock_install): self.assertTrue(line_exists_in_config(nss._lines, expected_config)) def test_ad_enabled(self, mock_install): - expected_config = "passwd: files hcp winbind" + expected_config = "passwd: files override_id winbind" nss = NssConfig(mock_session, args_bd_winbind, True) nss.apply() self.assertTrue(line_exists_in_config(nss._lines, expected_config)) From eb06992c8f494675e7f96ea70da0d06bf0373b3e Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 22 Jan 2025 16:13:17 +0000 Subject: [PATCH 068/117] Restructure valid allowed_operations computation In an effort to make the logic clearer, we restructure the part(s) of Xapi_pool_helpers responsible for determining which operations are "valid" (i.e. become members of allowed_operations). The previous code is somewhat tedious to understand because it is unconditionally ineffective - the logical delineation of parts of the code is implicit, as the code computes the valid operation table in order, but many of the operations will have no effect (as later code to populate an operation's entry in the validity table do nothing). To try and simplify matters, we add some level of static partitioning of "blocking" and "waiting" operations (using separate polymorphic variants and coercions to widen into a type comprising all operations). Then, we replace loops with "find first" computations. The current logic should be clearer. It is roughly as follows: To compute the "valid" operations: - Start by assuming all operations are valid. We explicitly map each operation to an "Allowed" constructor to signify this. Though, technically, full coverage of the cases would guarantee that absence from this table implies the associated operation is valid - however, we maintain the old concern and maintain validity entries as a tri-state value (Unknown, Allowed, and Disallowed). - Determine the current operations reported by the pool object. At present, every operation is statically partitioned into "blocking" or "waiting" operations - which are both handled differently, with a "blocking" operation taking highest precedence - If there's an operation in current operations that is a blocking operation, this takes precedence. We cover all operation cases as follows: (1) blocking operations get marked as being invalid and cite the reason associated with the blocking operation discovered in the current operations set (which is an operation-specific "in progress" error). Then, all waiting operations get marked as invalid, citing a generic "in progress" error (unrelated to any specific operation). - If there's no blocking operation in current operations, but there is a waiting operation, we map all operations to a generic "in progress" error. - If there is no blocking or waiting operation in current operations (which, at present, is to say that current operations is empty), then we invalidate entries based on specific, hardcoded, invariants. For example, if HA is enabled on the pool, we invalidate the `ha_enable` operation on the pool object (with the reason explicitly explaining that you HA is already enabled). In future, we could consider encoding the relations between operations (and related object state) declaratively, such that we can either automatically generate such code (and test it alongside invariants, encoded as Prolog-esque rules) or use an incremental computation framework to automatically recompute the allowed operations based on changes in ambient pool state. Signed-off-by: Colin James --- ocaml/xapi/xapi_pool_helpers.ml | 228 ++++++++++++++++++++------------ 1 file changed, 144 insertions(+), 84 deletions(-) diff --git a/ocaml/xapi/xapi_pool_helpers.ml b/ocaml/xapi/xapi_pool_helpers.ml index 2a3aedaf4f0..14f4c37d030 100644 --- a/ocaml/xapi/xapi_pool_helpers.ml +++ b/ocaml/xapi/xapi_pool_helpers.ml @@ -20,6 +20,30 @@ open Record_util let finally = Xapi_stdext_pervasives.Pervasiveext.finally +type blocking_operations = + [ `apply_updates + | `cluster_create + | `configure_repositories + | `designate_new_master + | `ha_disable + | `ha_enable + | `sync_bundle + | `sync_updates + | `tls_verification_enable ] + +type waiting_operations = + [ `cert_refresh + | `copy_primary_host_certs + | `eject + | `exchange_ca_certificates_on_join + | `exchange_certificates_on_join + | `get_updates ] + +type all_operations = [blocking_operations | waiting_operations] + +(* Unused, ensure every API operation is statically partitioned here. *) +let _id (op : API.pool_allowed_operations) : all_operations = op + (* psr is not included as a pool op because it can be considered in progress in between api calls (i.e. wrapping it inside with_pool_operation won't work) *) @@ -27,7 +51,7 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally * a) throw an error if any other blocked op is in progress * b) wait if only a wait op is in progress *) -let blocking_ops = +let blocking_ops_table : (blocking_operations * string) list = [ (`ha_enable, Api_errors.ha_enable_in_progress) ; (`ha_disable, Api_errors.ha_disable_in_progress) @@ -45,7 +69,7 @@ let blocking_ops = * * waiting is symmetric: if `ha_enable is in progress, and we want to perform * `copy_primary_host_certs, then we wait in this case too *) -let wait_ops = +let waiting_ops : waiting_operations list = [ `cert_refresh ; `exchange_certificates_on_join @@ -55,107 +79,143 @@ let wait_ops = ; `get_updates ] -let all_operations = blocking_ops |> List.map fst |> List.append wait_ops +(* Shadow with widening coercions to allow us to query using + operations from either set, whilst maintaining the static guarantees + of the original listings. *) +let blocking_ops_table : (all_operations * string) list = + List.map (fun (op, v) -> ((op :> all_operations), v)) blocking_ops_table + +let blocking_ops : all_operations list = List.map fst blocking_ops_table -(* see [Helpers.retry]. this error code causes a 'wait' *) -let wait_error = Api_errors.other_operation_in_progress +let waiting_ops = List.map (fun op -> (op :> all_operations)) waiting_ops -(** Returns a table of operations -> API error options (None if the operation would be ok) *) -let valid_operations ~__context record (pool : API.ref_pool) = +let all_operations : all_operations list = blocking_ops @ waiting_ops + +type validity = Unknown | Allowed | Disallowed of string * string list + +(* Computes a function (all_operations -> validity) that maps each + element of all_operations to a value indicating whether it would be + valid for it to be executed in the inputted execution context. *) +let compute_valid_operations ~__context record pool : + API.pool_allowed_operations -> validity = let ref = Ref.string_of pool in let current_ops = List.map snd record.Db_actions.pool_current_operations in - let table = Hashtbl.create 10 in - all_operations |> List.iter (fun x -> Hashtbl.replace table x None) ; - let set_errors (code : string) (params : string list) - (ops : API.pool_allowed_operations_set) = - List.iter - (fun op -> - if Hashtbl.find table op = None then - Hashtbl.replace table op (Some (code, params)) - ) - ops + let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in + let set_validity = Hashtbl.replace table in + (* Start by assuming all operations are allowed. *) + List.iter (fun op -> set_validity op Allowed) all_operations ; + (* Given a list of operations, map each to the given error. If an + error has already been specified for a given operation, do + nothing. *) + let set_errors ops ((error, detail) : string * string list) = + let populate op = + match Hashtbl.find table op with + | Allowed -> + set_validity op (Disallowed (error, detail)) + | Disallowed _ | Unknown -> + (* These cases should be impossible here. *) + () + in + List.iter populate ops in - if current_ops <> [] then ( - List.iter - (fun (blocking_op, err) -> - if List.mem blocking_op current_ops then ( - set_errors err [] (blocking_ops |> List.map fst) ; - set_errors Api_errors.other_operation_in_progress - [Datamodel_common._pool; ref] - wait_ops - ) - ) - blocking_ops ; - List.iter - (fun wait_op -> - if List.mem wait_op current_ops then - set_errors wait_error [Datamodel_common._pool; ref] all_operations - ) - wait_ops - ) ; - (* HA disable cannot run if HA is already disabled on a pool *) - (* HA enable cannot run if HA is already enabled on a pool *) - let ha_enabled = - Db.Pool.get_ha_enabled ~__context ~self:(Helpers.get_pool ~__context) + let other_operation_in_progress = + (Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref]) in - let current_stack = - Db.Pool.get_ha_cluster_stack ~__context ~self:(Helpers.get_pool ~__context) + let is_current_op = Fun.flip List.mem current_ops in + let blocking = + List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table in - if ha_enabled then ( - set_errors Api_errors.ha_is_enabled [] [`ha_enable] ; - (* TLS verification is not allowed to run if HA is enabled *) - set_errors Api_errors.ha_is_enabled [] [`tls_verification_enable] - ) else - set_errors Api_errors.ha_not_enabled [] [`ha_disable] ; - (* cluster create cannot run during a rolling pool upgrade *) - if Helpers.rolling_upgrade_in_progress ~__context then ( - set_errors Api_errors.not_supported_during_upgrade [] [`cluster_create] ; - set_errors Api_errors.not_supported_during_upgrade [] - [`tls_verification_enable] - ) ; - (* cluster create cannot run if a cluster already exists on the pool *) - ( match Db.Cluster.get_all ~__context with - | [_] -> - set_errors Api_errors.cluster_already_exists [] [`cluster_create] - (* indicates a bug or a need to update this code (if we ever support multiple clusters in the pool *) - | _ :: _ -> - failwith "Multiple clusters exist in the pool" - (* cluster create cannot run if ha is already enabled *) - | [] -> - if ha_enabled then - set_errors Api_errors.incompatible_cluster_stack_active [current_stack] - [`cluster_create] + let waiting = List.find_opt is_current_op waiting_ops in + ( match (blocking, waiting) with + | Some (_, reason), _ -> + (* Mark all potentially blocking operations as invalid due + to the specific blocking operation's "in progress" error. *) + set_errors blocking_ops (reason, []) ; + (* Mark all waiting operations as invalid for the generic + "OTHER_OPERATION_IN_PROGRESS" reason. *) + set_errors waiting_ops other_operation_in_progress + (* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this + invalidates all operations (with the reason partitioned + between whether the operation is blocking or waiting). *) + | None, Some _ -> + (* If there's no blocking operation in current operations, but + there is a waiting operation, invalidate all operations for the + generic reason. Again, this covers every operation. *) + set_errors all_operations other_operation_in_progress + | None, None -> ( + (* If there's no blocking or waiting operation in current + operations (i.e. current operations is empty), we can report + more precise reasons why operations would be invalid. *) + let ha_enabled, current_stack = + let self = Helpers.get_pool ~__context in + Db.Pool. + ( get_ha_enabled ~__context ~self + , get_ha_cluster_stack ~__context ~self + ) + in + if ha_enabled then ( + (* Can't enable HA if it's already enabled. *) + let ha_is_enabled = (Api_errors.ha_is_enabled, []) in + set_errors [`ha_enable] ha_is_enabled ; + (* TLS verification is not allowed to run if HA is enabled. *) + set_errors [`tls_verification_enable] ha_is_enabled + ) else (* Can't disable HA if it's not enabled. *) + set_errors [`ha_disable] (Api_errors.ha_not_enabled, []) ; + (* Cluster create cannot run during a rolling pool upgrade. *) + if Helpers.rolling_upgrade_in_progress ~__context then ( + let not_supported_during_upgrade = + (Api_errors.not_supported_during_upgrade, []) + in + set_errors [`cluster_create] not_supported_during_upgrade ; + set_errors [`tls_verification_enable] not_supported_during_upgrade + ) ; + (* Cluster create cannot run if a cluster already exists on the pool. *) + match Db.Cluster.get_all ~__context with + | [_] -> + set_errors [`cluster_create] (Api_errors.cluster_already_exists, []) + (* Indicates a bug or a need to update this code (if we ever support multiple clusters in the pool). *) + | _ :: _ -> + failwith "Multiple clusters exist in the pool" + (* Cluster create cannot run if HA is already enabled. *) + | [] -> + if ha_enabled then + let error = + (Api_errors.incompatible_cluster_stack_active, [current_stack]) + in + set_errors [`cluster_create] error + ) ) ; - table - -let throw_error table op = - match Hashtbl.find_opt table op with - | None -> - Helpers.internal_error - "xapi_pool_helpers.assert_operation_valid unknown operation: %s" - (pool_allowed_operations_to_string op) - | Some (Some (code, params)) -> - raise (Api_errors.Server_error (code, params)) - | Some None -> - () + fun op -> Hashtbl.find_opt table op |> Option.value ~default:Unknown let assert_operation_valid ~__context ~self ~(op : API.pool_allowed_operations) = - (* no pool operations allowed during a pending PSR *) + (* No pool operations allowed during a pending PSR. *) if Db.Pool.get_is_psr_pending ~__context ~self:(Helpers.get_pool ~__context) then raise Api_errors.(Server_error (pool_secret_rotation_pending, [])) ; let all = Db.Pool.get_record_internal ~__context ~self in - let table = valid_operations ~__context all self in - throw_error table op + let lookup = compute_valid_operations ~__context all self in + match lookup op with + | Allowed -> + () + | Disallowed (error, detail) -> + raise (Api_errors.Server_error (error, detail)) + | Unknown -> + (* This should never happen and implies our validity algorithm is incomplete. *) + let detail = + let op = pool_allowed_operations_to_string op in + Printf.sprintf "%s.%s unknown operation: %s" __MODULE__ __FUNCTION__ op + in + raise Api_errors.(Server_error (internal_error, [detail])) let update_allowed_operations ~__context ~self : unit = let all = Db.Pool.get_record_internal ~__context ~self in - let valid = valid_operations ~__context all self in - let keys = - Hashtbl.fold (fun k v acc -> if v = None then k :: acc else acc) valid [] + let is_allowed_op = + let lookup = compute_valid_operations ~__context all self in + fun op -> lookup op = Allowed in - Db.Pool.set_allowed_operations ~__context ~self ~value:keys + let value = List.filter is_allowed_op all_operations in + Db.Pool.set_allowed_operations ~__context ~self ~value (** Add to the Pool's current operations, call a function and then remove from the current operations. Ensure the allowed_operations are kept up to date. *) From 41b84af55e42499a1982adc92fa9ccba14939374 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 19 Feb 2025 10:42:18 +0000 Subject: [PATCH 069/117] CP-52365 adjust interface to dmv-utils * We currently have a mock implementation for driver-tool from the dmv-utils package. Adjust the command line argument structure to the real implementation to make the switch over easy. * Install the mock implementation if the real implementation is not in place. We might want to remove this later. Currently xapi.spec does not list the dependency. Signed-off-by: Christian Lindig --- ocaml/xapi/xapi_globs.ml | 2 +- ocaml/xapi/xapi_host_driver.ml | 9 ++- ocaml/xapi/xapi_host_driver_tool.ml | 91 +++++++++++++++++------------ 3 files changed, 62 insertions(+), 40 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 8d9f35655d0..a015535dd85 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -933,7 +933,7 @@ let xl_cmd = ref "/usr/sbin/xl" let depmod = ref "/usr/sbin/depmod" -let driver_tool = ref "/opt/xensource/debug/drivertool.sh" +let driver_tool = ref "/usr/sbin/driver-tool" let dracut = ref "/usr/bin/dracut" diff --git a/ocaml/xapi/xapi_host_driver.ml b/ocaml/xapi/xapi_host_driver.ml index a4061a7f9f0..0416b1de33f 100644 --- a/ocaml/xapi/xapi_host_driver.ml +++ b/ocaml/xapi/xapi_host_driver.ml @@ -74,7 +74,8 @@ module Variant = struct if v.API.driver_variant_hardware_present = false then no_hardware (Ref.string_of self) ; let stdout = - Tool.call ["select"; d.API.host_driver_name; v.API.driver_variant_name] + Tool.call + ["-s"; "-n"; d.API.host_driver_name; "-v"; v.API.driver_variant_name] in info "%s: %s" __FUNCTION__ stdout ; Db.Host_driver.set_selected_variant ~__context ~self:drv ~value:self @@ -171,10 +172,12 @@ let remove ~__context ~host ~except = (** Runs on [host]. We update or create an entry for each driver reported by drivertool and remove any extra driver that is in xapi. *) let scan ~__context ~host = - Tool.Mock.install () ; + let path = !Xapi_globs.driver_tool in + (* if the real tool is not installed, install a mock *) + if not (Sys.file_exists path) then Tool.Mock.install () ; let null = Ref.null in let drivers (* on this host *) = - Tool.call ["list"] + Tool.call ["-l"] |> Tool.parse |> List.map @@ fun (_name, driver) -> let driver_ref = diff --git a/ocaml/xapi/xapi_host_driver_tool.ml b/ocaml/xapi/xapi_host_driver_tool.ml index dd719dcd7ec..80fe5d208b1 100644 --- a/ocaml/xapi/xapi_host_driver_tool.ml +++ b/ocaml/xapi/xapi_host_driver_tool.ml @@ -242,26 +242,6 @@ module Mock = struct set -o errexit set -o pipefail -if [[ -n "$TRACE" ]]; then set -o xtrace; fi -set -o nounset - -if [[ "${1-}" =~ ^-*h(elp)?$ ]]; then - cat <&2 #>&2 redirects error message to stderr + exit 1 + ;; + :) # Missing argument for option + echo "Option -$OPTARG requires an argument." >&2 + exit 1 + ;; + esac +done + +# Shift the remaining positional parameters (if any) +shift $((OPTIND - 1)) + +# We don't properly prevent illegal combinations because this is just a +# mock. So we recognise -l first. +if $l_flag; then + list + exit 0 +fi -case "$1" in - list) - list - ;; - select) - selection "$2" "$3" - ;; - deselect) - deselect "$2" - ;; - *) - echo "unknown command $1" 2>&1 +if $s_flag; then + if [ -z "$n_value" ]; then + echo "missing -n" >&2 + exit 1 + fi + if [ -z "$v_value" ]; then + echo "missing -v" >&2 exit 1 - ;; -esac + fi + + selection "$n_value" "$v_value" + exit 0 +fi |} let install () = @@ -645,6 +664,6 @@ esac Xapi_stdext_unix.Unixext.write_string_to_file path drivertool_sh ; Unix.chmod path 0o755 with e -> - Helpers.internal_error ~log_err:true "%s: can't install %s: %s" - __FUNCTION__ path (Printexc.to_string e) + Helpers.internal_error "%s: can't install %s: %s" __FUNCTION__ path + (Printexc.to_string e) end From 41d458ecaef3b83ee08a840e85041cd0ec9388d9 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Fri, 21 Feb 2025 12:00:00 +0000 Subject: [PATCH 070/117] (docs) Mention that wait_xen_free_mem aborts on physinfo.scrub_pages==0 Signed-off-by: Bernhard Kaindl --- .../xenopsd/walkthroughs/VM.build/Domain.build.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md index 7d1152578b5..ba4274e243a 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md @@ -62,7 +62,20 @@ to call: 1. [Call](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L902-L911) [wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272) - to wait (if necessary), for the Xen memory scrubber to catch up reclaiming memory (CA-39743) + to wait (if necessary), for the Xen memory scrubber to catch up reclaiming memory. + It + 1. calls `Xenctrl.physinfo` which returns: + - `hostinfo.free_pages` - the free and already scrubbed pages (available) + - `host.scrub_pages` - the not yet scrubbed pages (not yet available) + 2. repeats this until a timeout as long as `free_pages` is *lower* + than the *required* pages + - unless if `scrub_pages` is 0 (no scrubbing left to do) + + Note: `free_pages` is system-wide memory, not memory specific to a NUMA node. + Because this is not NUMA-aware, in case of temporary node-specific memory shortage, + this check is not sufficient to prevent the VM from being spread over all NUMA nodes. + It is planned to resolve this issue by claiming NUMA node memory during NUMA placement. + 2. Call the hypercall to set the timer mode 3. Call the hypercall to set the number of vCPUs 4. Call the `numa_placement` function From 76f1d6501347917ea76cd9afefb275b8766e3734 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Feb 2025 12:44:08 +0000 Subject: [PATCH 071/117] CA-407033: Call `receive_finalize2` synchronously `Remote.receive_finalize2` is called at the end of SXM to clean things up and compose the base and leaf images together. The compose operation should only be called while the VDI is deactivated. Currently a thread is created to call `receive_finalize2`, which could caused problems where the VM itself gets started while the `receive_finalize2`/`VDI.compose` is still in progress. This is not a safe operation to do. The fix here is to simply remove the thread and make the whole operation sequential. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 6f153515ed7..37ec703709a 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -1338,22 +1338,14 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url end)) in - let t = - Thread.create - (fun () -> - debug "Calling receive_finalize2" ; - log_and_ignore_exn (fun () -> - Remote.DATA.MIRROR.receive_finalize2 "Mirror-cleanup" id - ) ; - debug "Finished calling receive_finalize2" ; - State.remove_local_mirror id ; - debug "Removed active local mirror: %s" id - ) - () - in - Option.iter (fun id -> Scheduler.cancel scheduler id) r.watchdog ; - debug "Created thread %d to call receive finalize and dp destroy" - (Thread.id t) + debug "Calling receive_finalize2" ; + log_and_ignore_exn (fun () -> + Remote.DATA.MIRROR.receive_finalize2 "Mirror-cleanup" id + ) ; + debug "Finished calling receive_finalize2" ; + State.remove_local_mirror id ; + debug "Removed active local mirror: %s" id ; + Option.iter (fun id -> Scheduler.cancel scheduler id) r.watchdog ) let nbd_handler req s ?(vm = "0") sr vdi dp = From 8583c5ed6c625279ffd4436ceee9a4502d3a5f89 Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 21 Feb 2025 14:25:06 +0000 Subject: [PATCH 072/117] Add internal links to XenAPI reference Adds a simple parser for Xapi type expressions that is used to rewrite the types shown in the XenAPI class reference to include links to relevant documentation. Signed-off-by: Colin James --- doc/assets/css/xenapi.css | 4 + doc/assets/js/parse.js | 146 ++++++++++++++++++++++++++++++ doc/layouts/partials/content.html | 69 +++++++++++++- 3 files changed, 214 insertions(+), 5 deletions(-) create mode 100644 doc/assets/js/parse.js diff --git a/doc/assets/css/xenapi.css b/doc/assets/css/xenapi.css index 10caf35ee63..4ab6ff3ea16 100644 --- a/doc/assets/css/xenapi.css +++ b/doc/assets/css/xenapi.css @@ -123,3 +123,7 @@ th { text-align: left; margin: 0; vertical-align: middle; } + +div[id$='_details'] { + cursor: default; +} diff --git a/doc/assets/js/parse.js b/doc/assets/js/parse.js new file mode 100644 index 00000000000..9460aab1bf7 --- /dev/null +++ b/doc/assets/js/parse.js @@ -0,0 +1,146 @@ + +class Type {}; + +class Builtin extends Type { + constructor(name) { + super(); + this.name = name; + } + + static ofString(s) { + const concrete = ['string', 'bool', 'int', 'float', 'void', 'datetime']; + if (!concrete.includes(s)) + return null; + + return new Builtin(s); + } +}; + +class Enum extends Type { + constructor(name) { + super(); + this.name = name; + } +}; + +class Ctor extends Type { + constructor(params, name) { + super(); + this.params = params; + this.name = name; + } +}; + +function lex(str) { + if (str.indexOf('$') >= 0) + throw new Error('Not allowed to contain $'); + + let ts = str.replaceAll('(', ' ( '); + ts = ts.replaceAll(')', ' ) '); + ts = ts.split(' '); + ts = ts.filter(x => x !== ''); + ts.push('$'); + return ts; +} + +class Lexer { + constructor(tokens) { + this.tokens = tokens; + this.pos = 0; + } + + shift() { + if (this.pos >= this.tokens.length - 1) + return '$'; + + return this.tokens[this.pos++]; + } + + peek() { + const prev = this.pos; + let t = this.shift(); + this.pos = prev; + return t; + } + + expect(ts) { + if (!Array.isArray(ts)) + ts = [ts]; + + let l = this.shift(); + for (const t of ts) + if (l == t) return; + + throw new Error(`Expected ${t}, got ${l}`); + } +}; + +function lbp(t) { + switch (t) { + case '(': + case ')': + case '->': + case '\u2192': + return 0; + case '$': + return -1; + } + + return 1; +} + +function nud(l, t) { + switch (t) { + case 'enum': + return new Enum(l.shift()); + + case '(': + let left = parseType(l, 0); + l.expect(['->', '\u2192']); + let right = parseType(l, 0); + l.expect(')'); + l.expect('map'); + return new Ctor([left, right], 'map'); + } + + let bty = Builtin.ofString(t); + if (bty != null) + return bty; + + const fmt = /^[a-zA-Z_]+$/; + if (fmt.test(t)) + return new Ctor([], t); + + throw new Error(`No null denotation for ${t}`); +} + +function led(l, left, t) { + const known = ['set', 'ref', 'option', 'record']; + if (!known.includes(t)) + throw new Error(`Invalid type constructor: ${t}`); + + return new Ctor([left], t); +} + +function parseType(l, rbp) { + let left = nud(l, l.shift()); + + while (lbp(l.peek()) > rbp) + left = led(l, left, l.shift()); + + return left; +} + +function parseSingleType(input) { + try { + let lexer = new Lexer(lex(input)); + let ty = parseType(lexer, 0); + if (lexer.peek() != '$') + throw new Error('Did not consume entire input'); + return ty; + } catch (e) { + } + + return null; +} + diff --git a/doc/layouts/partials/content.html b/doc/layouts/partials/content.html index 3700bf47032..007446b478c 100644 --- a/doc/layouts/partials/content.html +++ b/doc/layouts/partials/content.html @@ -8,6 +8,39 @@ {{ $c := .Page.Params.class }} {{ with index (where $.Site.Data.xenapi "name" $c) 0 }} + + {{ $style := resources.Get "css/xenapi.css" }} +{{ $parser := resources.Get "js/parse.js" }} + {{ with .lifecycle }}
@@ -64,11 +114,11 @@

Enums

{{ range $i, $x := .enums }}
-
{{ $x.name }}
+
{{ $x.name }}
ChangeElementDescription
@@ -146,16 +196,20 @@

{{ end }} {{ end }} -
+
{{replace (index $x.result 0) "->" "→"}} {{$x.name}} {{ $ptypes := slice }} {{ range $x.params }} {{ $ptypes = $ptypes | append (replace .type "->" "→") }} {{ end }} - ({{ delimit $ptypes ", " }}) + {{ $wrappedTypes := slice }} + {{ range $ptypes }} + {{ $wrappedTypes = $wrappedTypes | append (safeHTML (printf "%s" .)) }} + {{ end }} + ({{ delimit $wrappedTypes ", " | safeHTML }})
-