From f0afe95dbb490d5cb9b89c6760a85601bf3a8796 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 18 Jun 2024 13:53:15 +0100 Subject: [PATCH 1/8] CA-394169: Allow task to have permissions on itself Backport of f25cee63e Do not check permissions of a task if it is acting on itself. In the case the session is lost, the task needs to be able to update itself. If the session were to be lost, we would not able to retrieve back the correct `auth_user_id`. If the task ids match implies that we know we were already authorized. Signed-off-by: Gabriel Buica Co-authored-by: Christian Lindig --- ocaml/xapi/taskHelper.ml | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/taskHelper.ml b/ocaml/xapi/taskHelper.ml index 76f5185b00e..b23b78d0e71 100644 --- a/ocaml/xapi/taskHelper.ml +++ b/ocaml/xapi/taskHelper.ml @@ -56,6 +56,16 @@ let rbac_assert_permission_fn = ref None (* required to break dep-cycle with rbac.ml *) +let are_auth_user_ids_of_sessions_equal ~__context s1 s2 = + Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> + let s1_auth_user_sid = + Db_actions.DB_Action.Session.get_auth_user_sid ~__context ~self:s1 + in + let s2_auth_user_sid = + Db_actions.DB_Action.Session.get_auth_user_sid ~__context ~self:s2 + in + s1_auth_user_sid = s2_auth_user_sid + let assert_op_valid ?(ok_if_no_session_in_context = false) ~__context task_id = let assert_permission_task_op_any () = match !rbac_assert_permission_fn with @@ -80,19 +90,14 @@ let assert_op_valid ?(ok_if_no_session_in_context = false) ~__context task_id = | Some context_session -> let is_own_task = try - let task_session = - Db_actions.DB_Action.Task.get_session ~__context ~self:task_id - in - let task_auth_user_sid = - Db_actions.DB_Action.Session.get_auth_user_sid ~__context - ~self:task_session - in - let context_auth_user_sid = - Db_actions.DB_Action.Session.get_auth_user_sid ~__context - ~self:context_session - in - (*debug "task_auth_user_sid=%s,context_auth_user_sid=%s" task_auth_user_sid context_auth_user_sid;*) - task_auth_user_sid = context_auth_user_sid + (* If the task id is the same as the context's task id, we don't need + to go theough their respective sessions.*) + match Context.get_task_id __context = task_id with + | true -> + true + | false -> + are_auth_user_ids_of_sessions_equal ~__context context_session + (Db_actions.DB_Action.Task.get_session ~__context ~self:task_id) with e -> debug "assert_op_valid: %s" (ExnHelper.string_of_exn e) ; false From 32cd15fcc0a8989325a4538c8d03a930022bd6f6 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 18 Jun 2024 13:53:15 +0100 Subject: [PATCH 2/8] CA-394169: Allow task to have permissions on itself - backport Backport adjustments. Signed-off-by: Gabriel Buica Signed-off-by: Christian Lindig --- ocaml/xapi/taskHelper.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/ocaml/xapi/taskHelper.ml b/ocaml/xapi/taskHelper.ml index b23b78d0e71..4d254a5b66a 100644 --- a/ocaml/xapi/taskHelper.ml +++ b/ocaml/xapi/taskHelper.ml @@ -57,7 +57,6 @@ let rbac_assert_permission_fn = ref None (* required to break dep-cycle with rbac.ml *) let are_auth_user_ids_of_sessions_equal ~__context s1 s2 = - Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> let s1_auth_user_sid = Db_actions.DB_Action.Session.get_auth_user_sid ~__context ~self:s1 in From 3b85b7adc94081ea6147024ef8c87f6cec8bfe11 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 18 Jun 2024 09:53:09 +0100 Subject: [PATCH 3/8] CA-394444: Update task cancellation in `message_forwarding.ml` Add ``shutdown` to the list of operation that `hard_shutdown` can cancel. Currently, ``shutdown` is missing from the list of operation that `hard_shutdown` needs to cancel. Signed-off-by: Gabriel Buica Co-authored-by: Christian Lindig --- ocaml/xapi/message_forwarding.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index c4e1feae1b2..bbe3f1932e8 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -1858,6 +1858,7 @@ functor ; `hard_reboot ; `pool_migrate ; `call_plugin + ; `shutdown ; `suspend ] ; (* If VM is actually suspended and we ask to hard_shutdown, we need to From 7e01f0038389e917a9723d8301d44d0996a0825e Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 18 Jun 2024 11:24:45 +0100 Subject: [PATCH 4/8] CA-394444: Update `vm_operation_table` Adds ``shutdown` to `vm_operation_table`. `VM.shutdown` operations were displayed as `(unknown operation)`. This should display the operation correctly in logs and during `xe task-list`. Signed-off-by: Gabriel Buica Co-authored-by: Christian Lindig --- ocaml/xapi-cli-server/record_util.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index 4d5f36976d2..ed574bb62a2 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -108,6 +108,7 @@ let vm_operation_table = ; (`changing_NVRAM, "changing_NVRAM") ; (`start, "start") ; (`start_on, "start_on") + ; (`shutdown, "shutdown") ; (`suspend, "suspend") ; (`unpause, "unpause") ; (`update_allowed_operations, "update_allowed_operations") From c01429196127617e1a96550b8638d8a9815a1f7e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 8 Jul 2024 14:14:52 +0100 Subject: [PATCH 5/8] CA-395174: Try to unarchive VM's metrics when they aren't running Backport 71c39605b3a2dd6eddeba42a5b482b4fc1b3a4e8 Non-running VMs' metrics are stored in the coordinator. When the coordinator is asked about the metrics try to unarchive them instead of failing while trying to fetch the coordinator's IP address. This needs to force the HTTP method of the query to be POST Also returns a Service Unavailable when the host is marked as Broken. Signed-off-by: Pau Ruiz Safont Signed-off-by: Christian Lindig --- ocaml/xapi/rrdd_proxy.ml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/ocaml/xapi/rrdd_proxy.ml b/ocaml/xapi/rrdd_proxy.ml index 13a49f11493..cae11bebc51 100644 --- a/ocaml/xapi/rrdd_proxy.ml +++ b/ocaml/xapi/rrdd_proxy.ml @@ -77,17 +77,19 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = Http_svr.headers s (Http.http_302_redirect url) in let unarchive () = - let req = {req with uri= Constants.rrd_unarchive_uri} in + let req = {req with m= Post; uri= Constants.rrd_unarchive_uri} in ignore (Xapi_services.hand_over_connection req s !Rrd_interface.forwarded_path ) in + let unavailable () = + Http_svr.headers s (Http.http_503_service_unavailable ()) + in (* List of conditions involved. *) let is_unarchive_request = List.mem_assoc Constants.rrd_unarchive query in - let is_master = Pool_role.is_master () in let is_owner_online owner = Db.is_valid_ref __context owner in let is_xapi_initialising = List.mem_assoc "dbsync" query in (* The logic. *) @@ -99,15 +101,25 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = let owner = Db.VM.get_resident_on ~__context ~self:vm_ref in let owner_uuid = Ref.string_of owner in let is_owner_localhost = owner_uuid = localhost_uuid in - if is_owner_localhost then - if is_master then + let owner_is_available = + is_owner_online owner && not is_xapi_initialising + in + match + (Pool_role.get_role (), is_owner_localhost, owner_is_available) + with + | (Master | Slave _), false, true -> + (* VM is running elsewhere *) + read_at_owner owner + | Master, true, _ | Master, false, false -> + (* VM running on node, or not running at all. *) unarchive () - else + | Slave _, true, _ | Slave _, _, false -> + (* Coordinator knows best *) unarchive_at_master () - else if is_owner_online owner && not is_xapi_initialising then - read_at_owner owner - else - unarchive_at_master () + | Broken, _, _ -> + info "%s: host is broken, VM's metrics are not available" + __FUNCTION__ ; + unavailable () ) (* Forward the request for host RRD data to the RRDD HTTP handler. If the host From da92725a3c89a3b41b81b37ddf98a33820665ae5 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 8 Jul 2024 14:14:52 +0100 Subject: [PATCH 6/8] CA-395174: Try to unarchive VM's metrics when they aren't running Backport 71c39605b3a2dd6eddeba42a5b482b4fc1b3a4e8 Backport adjustments Signed-off-by: Christian Lindig --- ocaml/xapi/rrdd_proxy.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/rrdd_proxy.ml b/ocaml/xapi/rrdd_proxy.ml index cae11bebc51..e680cf15eea 100644 --- a/ocaml/xapi/rrdd_proxy.ml +++ b/ocaml/xapi/rrdd_proxy.ml @@ -118,7 +118,7 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = unarchive_at_master () | Broken, _, _ -> info "%s: host is broken, VM's metrics are not available" - __FUNCTION__ ; + "get_vm_rrd_forwarder" ; unavailable () ) From a6b658d649bd8607dcfb688567cc0734082f3bcf Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 8 Jul 2024 14:29:40 +0100 Subject: [PATCH 7/8] CA-395174: rrdd_proxy: Change *_at to specify the IP address Backport 7fe19554f3dcfa99b4e72015a7c62974a4a19424 Forces users to use an address, instead of being implicit, this avoid the underlying cause for the issue fixed in the previous commit: it allowed a coordinator to call Pool_role.get_master_address, which always fails. Signed-off-by: Pau Ruiz Safont Co-authored-by: Christian Lindig Signed-off-by: Christian Lindig --- ocaml/xapi/rrdd_proxy.ml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/rrdd_proxy.ml b/ocaml/xapi/rrdd_proxy.ml index e680cf15eea..2d15b80cd23 100644 --- a/ocaml/xapi/rrdd_proxy.ml +++ b/ocaml/xapi/rrdd_proxy.ml @@ -70,8 +70,7 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = let url = make_url ~address ~req in Http_svr.headers s (Http.http_302_redirect url) in - let unarchive_at_master () = - let address = Pool_role.get_master_address () in + let unarchive_at address = let query = (Constants.rrd_unarchive, "") :: query in let url = make_url_from_query ~address ~uri:req.uri ~query in Http_svr.headers s (Http.http_302_redirect url) @@ -113,9 +112,9 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = | Master, true, _ | Master, false, false -> (* VM running on node, or not running at all. *) unarchive () - | Slave _, true, _ | Slave _, _, false -> + | Slave coordinator, true, _ | Slave coordinator, _, false -> (* Coordinator knows best *) - unarchive_at_master () + unarchive_at coordinator | Broken, _, _ -> info "%s: host is broken, VM's metrics are not available" "get_vm_rrd_forwarder" ; From 41474a7d21a6e8affad3bab5c0225d1fa5169b0f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 8 Jul 2024 14:54:14 +0100 Subject: [PATCH 8/8] CA-395174: rrdd_proxy: Use Option to encode where VMs might be available at Backport 6bb7702454f291db6815235c9695f41b4d6b1acf This makes the selection of the action obvious, previously the two booleans made it hazy to understand the decision, and was part of the error why the coordinator tried to get the coordinator address from the pool_role file (and failed badly) Signed-off-by: Pau Ruiz Safont Signed-off-by: Christian Lindig --- ocaml/xapi/rrdd_proxy.ml | 53 ++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/ocaml/xapi/rrdd_proxy.ml b/ocaml/xapi/rrdd_proxy.ml index 2d15b80cd23..f419007d841 100644 --- a/ocaml/xapi/rrdd_proxy.ml +++ b/ocaml/xapi/rrdd_proxy.ml @@ -51,6 +51,7 @@ let fail_req_with (s : Unix.file_descr) msg (http_err : unit -> string list) = *) let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = debug "put_rrd_forwarder: start" ; + let __FUNCTION__ = "get_vm_rrd_forwarder" in let query = req.Http.Request.query in req.Http.Request.close <- true ; let vm_uuid = List.assoc "uuid" query in @@ -65,8 +66,7 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = Xapi_http.with_context ~dummy:true "Get VM RRD." req s (fun __context -> let open Http.Request in (* List of possible actions. *) - let read_at_owner owner = - let address = Db.Host.get_address ~__context ~self:owner in + let read_at address = let url = make_url ~address ~req in Http_svr.headers s (Http.http_302_redirect url) in @@ -89,35 +89,40 @@ let get_vm_rrd_forwarder (req : Http.Request.t) (s : Unix.file_descr) _ = let is_unarchive_request = List.mem_assoc Constants.rrd_unarchive query in - let is_owner_online owner = Db.is_valid_ref __context owner in - let is_xapi_initialising = List.mem_assoc "dbsync" query in + let metrics_at () = + let ( let* ) = Option.bind in + let owner_of vm = + let owner = Db.VM.get_resident_on ~__context ~self:vm in + let is_xapi_initialising = List.mem_assoc "dbsync" query in + let is_available = not is_xapi_initialising in + if Db.is_valid_ref __context owner && is_available then + Some owner + else + None + in + let* owner = owner_of (Db.VM.get_by_uuid ~__context ~uuid:vm_uuid) in + let owner_uuid = Db.Host.get_uuid ~__context ~self:owner in + if owner_uuid = Helpers.get_localhost_uuid () then + (* VM is local but metrics aren't available *) + None + else + let address = Db.Host.get_address ~__context ~self:owner in + Some address + in (* The logic. *) if is_unarchive_request then unarchive () else - let localhost_uuid = Helpers.get_localhost_uuid () in - let vm_ref = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in - let owner = Db.VM.get_resident_on ~__context ~self:vm_ref in - let owner_uuid = Ref.string_of owner in - let is_owner_localhost = owner_uuid = localhost_uuid in - let owner_is_available = - is_owner_online owner && not is_xapi_initialising - in - match - (Pool_role.get_role (), is_owner_localhost, owner_is_available) - with - | (Master | Slave _), false, true -> - (* VM is running elsewhere *) - read_at_owner owner - | Master, true, _ | Master, false, false -> - (* VM running on node, or not running at all. *) + match (Pool_role.get_role (), metrics_at ()) with + | (Master | Slave _), Some owner -> + read_at owner + | Master, None -> unarchive () - | Slave coordinator, true, _ | Slave coordinator, _, false -> - (* Coordinator knows best *) + | Slave coordinator, None -> unarchive_at coordinator - | Broken, _, _ -> + | Broken, _ -> info "%s: host is broken, VM's metrics are not available" - "get_vm_rrd_forwarder" ; + __FUNCTION__ ; unavailable () )