From bb672a9a3c75ead7c1236415b38cd2aeb29988b4 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Thu, 30 Jan 2025 07:56:21 +0000 Subject: [PATCH 1/3] CP-48676: Reuse pool sessions on slave logins. Prevent this reusable pool session from being destroyed so that it remains valid. This feature can be toggled with the flag reuse-pool-sessions. This commit has been reworked to use Atomic instead of a mutex to prevent a deadlock as shown in CA-400339. Signed-off-by: Steven Woods --- ocaml/xapi/xapi_globs.ml | 8 ++++ ocaml/xapi/xapi_session.ml | 80 +++++++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index a015535dd85..c5087df41ac 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1075,6 +1075,9 @@ let pool_recommendations_dir = ref "/etc/xapi.pool-recommendations.d" let disable_webserver = ref false +let reuse_pool_sessions = ref true +(* Enables the reuse of pool sessions, speeding up intrapool communication *) + let test_open = ref 0 let tgroups_enabled = ref false @@ -1709,6 +1712,11 @@ let other_options = , (fun () -> !driver_tool) , "Path to drivertool for selecting host driver variants" ) + ; ( "reuse-pool-sessions" + , Arg.Set reuse_pool_sessions + , (fun () -> string_of_bool !reuse_pool_sessions) + , "Enable the reuse of pool sessions" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index cdbbf638e86..0d7d2d3a40d 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -398,19 +398,24 @@ let is_subject_suspended ~__context ~cache subject_identifier = debug "Subject identifier %s is suspended" subject_identifier ; (is_suspended, subject_name) +let reusable_pool_session = Atomic.make Ref.null + let destroy_db_session ~__context ~self = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> - Xapi_event.on_session_deleted self ; - (* unregister from the event system *) - (* This info line is important for tracking, auditability and client accountability purposes on XenServer *) - (* Never print the session id nor uuid: they are secret values that should be known only to the user that *) - (* logged in. Instead, we print a non-invertible hash as the tracking id for the session id *) - (* see also task creation in context.ml *) - (* CP-982: create tracking id in log files to link username to actions *) - info "Session.destroy %s" (trackid self) ; - Rbac_audit.session_destroy ~__context ~session_id:self ; - (try Db.Session.destroy ~__context ~self with _ -> ()) ; - Rbac.destroy_session_permissions_tbl ~session_id:self + if self <> Atomic.get reusable_pool_session then ( + Xapi_event.on_session_deleted self ; + (* unregister from the event system *) + (* This info line is important for tracking, auditability and client accountability purposes on XenServer *) + (* Never print the session id nor uuid: they are secret values that should be known only to the user that *) + (* logged in. Instead, we print a non-invertible hash as the tracking id for the session id *) + (* see also task creation in context.ml *) + (* CP-982: create tracking id in log files to link username to actions *) + info "Session.destroy %s" (trackid self) ; + Rbac_audit.session_destroy ~__context ~session_id:self ; + (try Db.Session.destroy ~__context ~self with _ -> ()) ; + Rbac.destroy_session_permissions_tbl ~session_id:self + ) else + info "Skipping Session.destroy for reusable pool session %s" (trackid self) (* CP-703: ensure that activate sessions are invalidated in a bounded time *) (* in response to external authentication/directory services updates, such as *) @@ -610,8 +615,8 @@ let revalidate_all_sessions ~__context = debug "Unexpected exception while revalidating external sessions: %s" (ExnHelper.string_of_exn e) -let login_no_password_common ~__context ~uname ~originator ~host ~pool - ~is_local_superuser ~subject ~auth_user_sid ~auth_user_name +let login_no_password_common_create_session ~__context ~uname ~originator ~host + ~pool ~is_local_superuser ~subject ~auth_user_sid ~auth_user_name ~rbac_permissions ~db_ref ~client_certificate = Context.with_tracing ~originator ~__context __FUNCTION__ @@ fun __context -> let create_session () = @@ -661,6 +666,55 @@ let login_no_password_common ~__context ~uname ~originator ~host ~pool ignore (Client.Pool.get_all ~rpc ~session_id) ; session_id +let login_no_password_common ~__context ~uname ~originator ~host ~pool + ~is_local_superuser ~subject ~auth_user_sid ~auth_user_name + ~rbac_permissions ~db_ref ~client_certificate = + Context.with_tracing ~originator ~__context __FUNCTION__ @@ fun __context -> + let is_valid_session session_id = + if session_id = Ref.null then + false + else + try + (* Call an API function to check the session is still valid *) + let rpc = Helpers.make_rpc ~__context in + ignore (Client.Pool.get_all ~rpc ~session_id) ; + true + with Api_errors.Server_error (err, _) -> + debug "%s: Invalid session: %s" __FUNCTION__ err ; + false + in + let create_session () = + login_no_password_common_create_session ~__context ~uname ~originator ~host + ~pool ~is_local_superuser ~subject ~auth_user_sid ~auth_user_name + ~rbac_permissions ~db_ref ~client_certificate + in + let rec get_session () = + let session = Atomic.get reusable_pool_session in + if is_valid_session session then ( + (* Check if the session changed during validation. + Use our version regardless to avoid being stuck in a loop of session creation *) + if Atomic.get reusable_pool_session <> session then + debug "reusable_pool_session has changed, using the original anyway" ; + session + ) else + let new_session = create_session () in + if Atomic.compare_and_set reusable_pool_session session new_session then + new_session + else ( + (* someone else raced with us and created a session, destroy ours and attempt to use theirs *) + destroy_db_session ~__context ~self:new_session ; + (get_session [@tailcall]) () + ) + in + if + (originator, pool, is_local_superuser, uname) + = (xapi_internal_originator, true, true, None) + && !Xapi_globs.reuse_pool_sessions + then + get_session () + else + create_session () + (* XXX: only used internally by the code which grants the guest access to the API. Needs to be protected by a proper access control system *) let login_no_password ~__context ~uname ~host ~pool ~is_local_superuser ~subject From 9aa9e7e6ace7db66992c997aef5a1afb827e4a67 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 23 Sep 2024 11:30:08 +0100 Subject: [PATCH 2/3] CP-48676: Don't check resuable pool session validity by default Add a new flag validate-reusable-pool-session to xapi globs which skips the reusable pool session validity check if it is false. This saves time as we are no longer calling pool.get_all for each session. Signed-off-by: Steven Woods --- ocaml/xapi/xapi_globs.ml | 8 ++++++++ ocaml/xapi/xapi_session.ml | 10 +++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index c5087df41ac..e298640b4f4 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1078,6 +1078,9 @@ let disable_webserver = ref false let reuse_pool_sessions = ref true (* Enables the reuse of pool sessions, speeding up intrapool communication *) +let validate_reusable_pool_session = ref false +(* Validate a reusable session before each use. This is slower and should not be required *) + let test_open = ref 0 let tgroups_enabled = ref false @@ -1717,6 +1720,11 @@ let other_options = , (fun () -> string_of_bool !reuse_pool_sessions) , "Enable the reuse of pool sessions" ) + ; ( "validate-reusable-pool-session" + , Arg.Set validate_reusable_pool_session + , (fun () -> string_of_bool !validate_reusable_pool_session) + , "Enable validation of reusable pool sessions before use" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index 0d7d2d3a40d..85554b6c6eb 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -671,9 +671,12 @@ let login_no_password_common ~__context ~uname ~originator ~host ~pool ~rbac_permissions ~db_ref ~client_certificate = Context.with_tracing ~originator ~__context __FUNCTION__ @@ fun __context -> let is_valid_session session_id = - if session_id = Ref.null then - false - else + match (session_id, !Xapi_globs.validate_reusable_pool_session) with + | session, _ when session = Ref.null -> + false + | _, false -> + true + | session_id, true -> ( try (* Call an API function to check the session is still valid *) let rpc = Helpers.make_rpc ~__context in @@ -682,6 +685,7 @@ let login_no_password_common ~__context ~uname ~originator ~host ~pool with Api_errors.Server_error (err, _) -> debug "%s: Invalid session: %s" __FUNCTION__ err ; false + ) in let create_session () = login_no_password_common_create_session ~__context ~uname ~originator ~host From e42cca1936c23371da30cc835d59407aab65f951 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Thu, 13 Mar 2025 11:11:17 +0000 Subject: [PATCH 3/3] CP-48676: Disable pool session reuse by default This can be reverted once we have sufficient confidence from running nightlies Signed-off-by: Steven Woods --- ocaml/xapi/xapi_globs.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index e298640b4f4..4760b3d3743 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1075,7 +1075,7 @@ let pool_recommendations_dir = ref "/etc/xapi.pool-recommendations.d" let disable_webserver = ref false -let reuse_pool_sessions = ref true +let reuse_pool_sessions = ref false (* Enables the reuse of pool sessions, speeding up intrapool communication *) let validate_reusable_pool_session = ref false