From 601154563fedaac60c68df7aa03b5b13d058a555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:18:39 +0100 Subject: [PATCH 01/49] CP-49158: [prep] Add Task completion latency benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Signed-off-by: Edwin Török --- ocaml/tests/bench/bench_throttle2.ml | 86 ++++++++++++++++++++++++++++ ocaml/tests/bench/dune | 4 +- ocaml/xapi/taskHelper.mli | 3 + 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 ocaml/tests/bench/bench_throttle2.ml diff --git a/ocaml/tests/bench/bench_throttle2.ml b/ocaml/tests/bench/bench_throttle2.ml new file mode 100644 index 00000000000..50582eff4cc --- /dev/null +++ b/ocaml/tests/bench/bench_throttle2.ml @@ -0,0 +1,86 @@ +open Bechamel + +let () = + Suite_init.harness_init () ; + Debug.set_level Syslog.Warning + +let __context, _ = Test_event_common.event_setup_common () + +let allocate_tasks n = + ( __context + , Array.init n @@ fun i -> + let label = Printf.sprintf "task %d" i in + Xapi_task.create ~__context ~label ~description:"test task" + ) + +let free_tasks (__context, tasks) = + let () = + tasks |> Array.iter @@ fun self -> Xapi_task.destroy ~__context ~self + in + () + +let set_pending tasks = + tasks + |> Array.iter @@ fun self -> + Xapi_task.set_status ~__context ~self ~value:`pending + +let run_tasks _n (__context, tasks) = + set_pending tasks ; + let () = + tasks + |> Array.iter @@ fun self -> + Xapi_task.set_status ~__context ~self ~value:`success + in + tasks |> Array.iter @@ fun t -> Helpers.Task.wait_for ~__context ~tasks:[t] + +let run_tasks' _n (__context, tasks) = + set_pending tasks ; + let () = + tasks + |> Array.iter @@ fun self -> + Xapi_task.set_status ~__context ~self ~value:`success + in + Helpers.Task.wait_for ~__context ~tasks:(Array.to_list tasks) + +module D = Debug.Make (struct let name = __MODULE__ end) + +let run_tasks'' n (__context, tasks) = + set_pending tasks ; + let finished = Atomic.make 0 in + let (t : Thread.t) = + Thread.create + (fun () -> + for _ = 1 to 10 do + Thread.yield () + done ; + tasks + |> Array.iter @@ fun self -> + Xapi_task.set_status ~__context ~self ~value:`success ; + Atomic.incr finished + ) + () + in + Helpers.Task.wait_for ~__context ~tasks:(Array.to_list tasks) ; + let f = Atomic.get finished in + assert (f = n || f = n - 1) ; + Thread.join t + +let benchmarks = + Test.make_grouped ~name:"Task latency" + [ + Test.make_indexed_with_resource ~name:"task complete+wait latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks + ~free:free_tasks (fun n -> Staged.stage (run_tasks n) + ) + ; Test.make_indexed_with_resource ~name:"task complete+wait all latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks + ~free:free_tasks (fun n -> Staged.stage (run_tasks' n) + ) + ; Test.make_indexed_with_resource + ~name:"task complete+wait all latency (thread)" ~args:[1; 10; 100] + Test.multiple ~allocate:allocate_tasks ~free:free_tasks (fun n -> + Staged.stage (run_tasks'' n) + ) + ] + +let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index dcd61813e1e..10cffadb857 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -1,4 +1,4 @@ (executables - (names bench_tracing bench_uuid) - (libraries tracing bechamel bechamel-notty notty.unix tracing_export threads.posix fmt notty uuid) + (names bench_tracing bench_uuid bench_throttle2) + (libraries tracing bechamel bechamel-notty notty.unix tracing_export threads.posix fmt notty uuid xapi_aux tests_common log xapi_internal) ) diff --git a/ocaml/xapi/taskHelper.mli b/ocaml/xapi/taskHelper.mli index dc5d76cf65b..1c4d5381586 100644 --- a/ocaml/xapi/taskHelper.mli +++ b/ocaml/xapi/taskHelper.mli @@ -36,6 +36,9 @@ val set_result : __context:Context.t -> Rpc.t option -> unit val status_is_completed : [> `cancelled | `failure | `success] -> bool +val status_to_string : + [< `pending | `success | `failure | `cancelling | `cancelled] -> string + val complete : __context:Context.t -> Rpc.t option -> unit val set_cancellable : __context:Context.t -> unit From 95dbc42a73f94750a92484259f7bbef7b43ee04e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 14:29:03 +0100 Subject: [PATCH 02/49] CP-51690: [prep] Xapi_periodic_scheduler: Factor out Delay.wait call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_periodic_scheduler.ml | 31 +++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi/xapi_periodic_scheduler.ml b/ocaml/xapi/xapi_periodic_scheduler.ml index 1edcb938857..25bb7a49f73 100644 --- a/ocaml/xapi/xapi_periodic_scheduler.ml +++ b/ocaml/xapi/xapi_periodic_scheduler.ml @@ -58,6 +58,22 @@ let remove_from_queue name = if index > -1 then Ipq.remove queue index +let wait_next sleep = + try ignore (Delay.wait delay sleep) + with e -> + let detailed_msg = + match e with + | Unix.Unix_error (code, _, _) -> + Unix.error_message code + | _ -> + "unknown error" + in + error + "Could not schedule interruptable delay (%s). Falling back to normal \ + delay. New events may be missed." + detailed_msg ; + Thread.delay sleep + let loop () = debug "Periodic scheduler started" ; try @@ -85,20 +101,7 @@ let loop () = |> Mtime.Span.add (Clock.span 0.001) |> Scheduler.span_to_s in - try ignore (Delay.wait delay sleep) - with e -> - let detailed_msg = - match e with - | Unix.Unix_error (code, _, _) -> - Unix.error_message code - | _ -> - "unknown error" - in - error - "Could not schedule interruptable delay (%s). Falling back to \ - normal delay. New events may be missed." - detailed_msg ; - Thread.delay sleep + wait_next sleep done with _ -> error From 68af6ce342206115eb8b2739d355c7d2248ec845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 14:29:38 +0100 Subject: [PATCH 03/49] CP-51690: [bugfix] Xapi_periodic_scheduler: avoid 10s sleep on empty queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wake up the scheduler immediately when there are more tasks. Otherwise timeouts <10s may not work correctly, and it is difficult to test the periodic scheduler if you need to wait 10s for it to start working. If there are no tasks, then it will still sleep efficiently, but as soon as more tasks are added (with [~signal:true], which is the default) it will immediately wake up and calculate the next sleep time. In practice it is probably quite rare for XAPI's queue to be empty (there are usually periodic tasks), but we cannot rely on this. Signed-off-by: Edwin Török --- ocaml/tests/test_event.ml | 32 +++++++++++++++++++++++++++ ocaml/xapi/xapi_periodic_scheduler.ml | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/test_event.ml b/ocaml/tests/test_event.ml index 9078244462b..d36dba90eff 100644 --- a/ocaml/tests/test_event.ml +++ b/ocaml/tests/test_event.ml @@ -277,6 +277,37 @@ let object_level_event_test _session_id = if !failure then Alcotest.fail "failed to see object-level event change" +let test_short_oneshot () = + (* don't call event_setup_common here, it'll register a dummy event and hide the bug *) + let started = ref false in + let m = Mutex.create () in + let cond = Condition.create () in + let scheduler () = + Mutex.lock m ; + started := true ; + Condition.broadcast cond ; + Mutex.unlock m ; + Xapi_periodic_scheduler.loop () + in + ignore (Thread.create scheduler ()) ; + (* ensure scheduler sees an empty queue , by waiting for it to start *) + Mutex.lock m ; + while not !started do + Condition.wait cond m + done ; + Mutex.unlock m ; + (* run the scheduler, let it realize its queue is empty, + a Thread.yield is not enough due to the use of debug which would yield back almost immediately. + *) + Thread.delay 1. ; + let fired = Atomic.make false in + let fire () = Atomic.set fired true in + let task = "test_oneshot" in + Xapi_periodic_scheduler.add_to_queue task Xapi_periodic_scheduler.OneShot 1. + fire ; + Thread.delay 2. ; + assert (Atomic.get fired) + let test = [ ("test_event_from_timeout", `Slow, test_event_from_timeout) @@ -287,4 +318,5 @@ let test = ; ("test_event_from", `Quick, event_from_test) ; ("test_event_from_parallel", `Slow, event_from_parallel_test) ; ("test_event_object_level_event", `Slow, object_level_event_test) + ; ("test_short_oneshot", `Slow, test_short_oneshot) ] diff --git a/ocaml/xapi/xapi_periodic_scheduler.ml b/ocaml/xapi/xapi_periodic_scheduler.ml index 25bb7a49f73..7463c55c12b 100644 --- a/ocaml/xapi/xapi_periodic_scheduler.ml +++ b/ocaml/xapi/xapi_periodic_scheduler.ml @@ -80,7 +80,7 @@ let loop () = while true do let empty = with_lock lock (fun () -> Ipq.is_empty queue) in if empty then - Thread.delay 10.0 + wait_next 10.0 (* Doesn't happen often - the queue isn't usually empty *) else let next = with_lock lock (fun () -> Ipq.maximum queue) in From a2f34417d3014b229c4be27046051e194635ed36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:54:42 +0100 Subject: [PATCH 04/49] CP-51693: feat(use-xmlrpc): [perf] use JSONRPC instead of XMLRPC for internal communication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Feature flag: use-xmlrpc Signed-off-by: Edwin Török --- ocaml/tests/common/mock_rpc.ml | 2 +- ocaml/xapi/helpers.ml | 10 ++++++++-- ocaml/xapi/xapi_globs.ml | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/ocaml/tests/common/mock_rpc.ml b/ocaml/tests/common/mock_rpc.ml index 808308afb1c..9edf87897e7 100644 --- a/ocaml/tests/common/mock_rpc.ml +++ b/ocaml/tests/common/mock_rpc.ml @@ -25,7 +25,7 @@ let rpc __context call = Rpc. { success= true - ; contents= contents |> Xmlrpc.to_string |> Xmlrpc.of_string + ; contents= contents |> Jsonrpc.to_string |> Jsonrpc.of_string ; is_notification= false } | "VM.update_allowed_operations", [session_id_rpc; self_rpc] -> diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 30965068f3f..d0edcb075a6 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -395,7 +395,13 @@ let make_rpc ~__context rpc : Rpc.response = let subtask_of = Ref.string_of (Context.get_task_id __context) in let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~subtask_of ~version:"1.1" "/" ~tracing in + let dorpc, path = + if !Xapi_globs.use_xmlrpc then + (XMLRPC_protocol.rpc, "/") + else + (JSONRPC_protocol.rpc, "/jsonrpc") + in + let http = xmlrpc ~subtask_of ~version:"1.1" path ~tracing in let transport = if Pool_role.is_master () then Unix Xapi_globs.unix_domain_socket @@ -407,7 +413,7 @@ let make_rpc ~__context rpc : Rpc.response = , !Constants.https_port ) in - XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"xapi" ~transport ~http rpc + dorpc ~srcstr:"xapi" ~dststr:"xapi" ~transport ~http rpc let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = let subtask_of = Ref.string_of (Context.get_task_id __context) in diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index efdcabfbdb6..d59af9e2e49 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1025,6 +1025,8 @@ let max_spans = ref 10000 let max_traces = ref 10000 +let use_xmlrpc = ref true + let compress_tracing_files = ref true let prefer_nbd_attach = ref false @@ -1436,6 +1438,11 @@ let other_options = , (fun () -> string_of_bool !allow_host_sched_gran_modification) , "Allows to modify the host's scheduler granularity" ) + ; ( "use-xmlrpc" + , Arg.Set use_xmlrpc + , (fun () -> string_of_bool !use_xmlrpc) + , "Use XMLRPC (deprecated) for internal communication or JSONRPC" + ) ; ( "extauth_ad_backend" , Arg.Set_string extauth_ad_backend , (fun () -> !extauth_ad_backend) From 71a4a847104becc73d88130d1f4ede52ba49ff6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 21:02:32 +0100 Subject: [PATCH 05/49] CP-51701: [perf] Xapi_event: do not convert to lowercase if already lowercase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasks are lowercase Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 8c7432106ab..9a04233abdd 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -49,6 +49,10 @@ module Token = struct Printf.sprintf "%020Ld,%020Ld" last last_t end +let is_lowercase_char c = Char.equal (Char.lowercase_ascii c) c + +let is_lowercase str = String.for_all is_lowercase_char str + module Subscription = struct type t = Class of string | Object of string * string | All @@ -71,7 +75,7 @@ module Subscription = struct (** [table_matches subs tbl]: true if at least one subscription from [subs] would select some events from [tbl] *) let table_matches subs tbl = - let tbl = String.lowercase_ascii tbl in + let tbl = if is_lowercase tbl then tbl else String.lowercase_ascii tbl in let matches = function | All -> true @@ -84,7 +88,7 @@ module Subscription = struct (** [event_matches subs ev]: true if at least one subscription from [subs] selects for specified class and object *) let object_matches subs ty _ref = - let tbl = String.lowercase_ascii ty in + let tbl = if is_lowercase ty then ty else String.lowercase_ascii ty in let matches = function | All -> true From 774316fd9fd7d8c6a6e771a47bb64f0725826a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:56:57 +0100 Subject: [PATCH 06/49] CP-51701: [perf] Xapi_event: drop duplicate lowercase_ascii MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subscription.object_matches already does it Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 9a04233abdd..36ae1409435 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -544,11 +544,7 @@ let from_inner __context session subs from from_t deadline = Db_cache_types.Table.fold_over_recent !last_generation (fun objref {Db_cache_types.Stat.created; modified; deleted} _ (creates, mods, deletes, last) -> - if - Subscription.object_matches subs - (String.lowercase_ascii table) - objref - then + if Subscription.object_matches subs table objref then let last = max last (max modified deleted) in (* mtime guaranteed to always be larger than ctime *) ( ( if created > !last_generation then @@ -578,11 +574,7 @@ let from_inner __context session subs from from_t deadline = Db_cache_types.Table.fold_over_deleted !last_generation (fun objref {Db_cache_types.Stat.created; modified; deleted} (creates, mods, deletes, last) -> - if - Subscription.object_matches subs - (String.lowercase_ascii table) - objref - then + if Subscription.object_matches subs table objref then let last = max last (max modified deleted) in (* mtime guaranteed to always be larger than ctime *) if created > !last_generation then From 4b8613463a7bf35fe55155e383f526898d51b6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:58:18 +0100 Subject: [PATCH 07/49] CP-51701: [perf] Xapi_events: replace List.any+map with List.exists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 36ae1409435..19fb2b0199b 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -71,8 +71,6 @@ module Subscription = struct (Api_errors.event_subscription_parse_failure, [x]) ) - let any = List.fold_left (fun acc x -> acc || x) false - (** [table_matches subs tbl]: true if at least one subscription from [subs] would select some events from [tbl] *) let table_matches subs tbl = let tbl = if is_lowercase tbl then tbl else String.lowercase_ascii tbl in @@ -84,7 +82,7 @@ module Subscription = struct | Object (x, _) -> x = tbl in - any (List.map matches subs) + List.exists matches subs (** [event_matches subs ev]: true if at least one subscription from [subs] selects for specified class and object *) let object_matches subs ty _ref = @@ -97,7 +95,7 @@ module Subscription = struct | Object (x, y) -> x = tbl && y = _ref in - any (List.map matches subs) + List.exists matches subs (** [event_matches subs ev]: true if at least one subscription from [subs] selects for event [ev] *) let event_matches subs ev = object_matches subs ev.ty ev.reference From 5115fa139ae709bb5c808ea15d32df137e163e19 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 25 Sep 2024 10:10:10 +0100 Subject: [PATCH 08/49] CP-49064:`Tgroup` library Creates a new library `Tgroup`, that abstracts and manages groups of execution threads in xapi. When xapi is under load, all the threads need to share a single cpu in dom0 because of ocaml runtime single-cpu restrictions. This library is meant to orchestrate the threads in different priority groups. Signed-off-by: Gabriel Buica --- ocaml/libs/tgroup/dune | 3 + ocaml/libs/tgroup/tgroup.ml | 130 +++++++++++++++++++++++++++++++++++ ocaml/libs/tgroup/tgroup.mli | 70 +++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 ocaml/libs/tgroup/dune create mode 100644 ocaml/libs/tgroup/tgroup.ml create mode 100644 ocaml/libs/tgroup/tgroup.mli diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune new file mode 100644 index 00000000000..d1cccfbe444 --- /dev/null +++ b/ocaml/libs/tgroup/dune @@ -0,0 +1,3 @@ +(library + (name tgroup) + (libraries xapi-log xapi-stdext-unix)) diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml new file mode 100644 index 00000000000..c59729f69c5 --- /dev/null +++ b/ocaml/libs/tgroup/tgroup.ml @@ -0,0 +1,130 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +let ( // ) = Filename.concat + +module Group = struct + module Internal = struct + type t + + let name = "internal" + end + + module External = struct + type t + + let name = "external" + end + + module Host = struct + type t + + let name = "host" + end + + module SM = struct + type t + + let name = "SM" + end + + type _ group = + | Internal_Host_SM : (Internal.t * Host.t * SM.t) group + | EXTERNAL : External.t group + + type t = Group : 'a group -> t + + let all = [Group Internal_Host_SM; Group EXTERNAL] + + module Originator = struct + type t = Internal_Host_SM | EXTERNAL + + let of_string = function + | s + when String.equal + (String.lowercase_ascii SM.name) + (String.lowercase_ascii s) -> + Internal_Host_SM + | s + when String.equal + (String.lowercase_ascii External.name) + (String.lowercase_ascii s) -> + EXTERNAL + | _ -> + EXTERNAL + + let to_string = function + | Internal_Host_SM -> + SM.name + | EXTERNAL -> + External.name + end + + module Creator = struct + type t = { + user: string option + ; endpoint: string option + ; originator: Originator.t + } + + let make ?user ?endpoint originator = {originator; user; endpoint} + + let to_string c = + Printf.sprintf "Creator -> user:%s endpoint:%s originator:%s" + (Option.value c.user ~default:"") + (Option.value c.endpoint ~default:"") + (Originator.to_string c.originator) + end + + let of_originator = function + | Originator.Internal_Host_SM -> + Group Internal_Host_SM + | Originator.EXTERNAL -> + Group EXTERNAL + + let get_originator = function + | Group Internal_Host_SM -> + Originator.Internal_Host_SM + | Group EXTERNAL -> + Originator.EXTERNAL + + let of_creator creator = of_originator creator.Creator.originator + + let to_cgroup : type a. a group -> string = function + | Internal_Host_SM -> + Internal.name // Host.name // SM.name + | EXTERNAL -> + External.name +end + +module Cgroup = struct + type t = string + + let cgroup_dir = Atomic.make None + + let dir_of group : t option = + match group with + | Group.Group group -> + Option.map + (fun dir -> dir // Group.to_cgroup group) + (Atomic.get cgroup_dir) + + let init dir = + let () = Atomic.set cgroup_dir (Some dir) in + Group.all + |> List.filter_map dir_of + |> List.iter (fun dir -> Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755) +end diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli new file mode 100644 index 00000000000..ce87efc593c --- /dev/null +++ b/ocaml/libs/tgroup/tgroup.mli @@ -0,0 +1,70 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** [Group] module helps with the classification of different xapi execution + threads.*) +module Group : sig + (** Abstract type that represents a group of execution threads in xapi. Each + group corresponds to a Creator, and has a designated level of priority.*) + type t + + (** Generic representation of different xapi threads originators. *) + module Originator : sig + (** Type that represents different originators of xapi threads. *) + type t = Internal_Host_SM | EXTERNAL + + val of_string : string -> t + (** [of_string s] creates an originator from a string [s]. + + e.g create an originator based on a http header. *) + + val to_string : t -> string + (** [to_string o] converts an originator [o] to its string representation.*) + end + + (** Generic representation of different xapi threads creators. *) + module Creator : sig + (** Abstract type that represents different creators of xapi threads.*) + type t + + val make : ?user:string -> ?endpoint:string -> Originator.t -> t + (** [make o] creates a creator type based on a given originator [o].*) + + val to_string : t -> string + (** [to_string c] converts a creator [c] to its string representation.*) + end + + val get_originator : t -> Originator.t + (** [get_originator group] returns the originator that maps to group [group].*) + + val of_creator : Creator.t -> t + (** [of_creator c] returns the corresponding group based on the creator [c].*) +end + +(** [Cgroup] module encapsulates different function for managing the cgroups +corresponding with [Groups].*) +module Cgroup : sig + (** Represents one of the children of the cgroup directory.*) + type t = string + + val dir_of : Group.t -> t option + (** [dir_of group] returns the full path of the cgroup directory corresponding + to the group [group] as [Some dir]. + + Returns [None] if [init dir] has not been called. *) + + val init : string -> unit + (** [init dir] initializes the hierachy of cgroups associated to all [Group.t] + types under the directory [dir].*) +end From ce7de908fd398a96521a8a95c5c4ed88bd183c25 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 25 Sep 2024 14:46:55 +0100 Subject: [PATCH 09/49] CP-51493: Add `set_cgroup` `set_cgroup` adds the functionality of adding the current thread in a cgroup based on its creator. Signed-off-by: Gabriel Buica --- ocaml/libs/tgroup/dune | 2 +- ocaml/libs/tgroup/tgroup.ml | 34 ++++++++++++++++++++++++++++++++++ ocaml/libs/tgroup/tgroup.mli | 4 ++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune index d1cccfbe444..025a5adc891 100644 --- a/ocaml/libs/tgroup/dune +++ b/ocaml/libs/tgroup/dune @@ -1,3 +1,3 @@ (library (name tgroup) - (libraries xapi-log xapi-stdext-unix)) + (libraries xapi-log xapi-stdext-unix xapi-stdext-pervasives)) diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index c59729f69c5..b4c87b087b9 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -14,6 +14,8 @@ module D = Debug.Make (struct let name = __MODULE__ end) +open D + let ( // ) = Filename.concat module Group = struct @@ -127,4 +129,36 @@ module Cgroup = struct Group.all |> List.filter_map dir_of |> List.iter (fun dir -> Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755) + + let write_cur_tid_to_cgroup_file filename = + try + let perms = 0o640 in + let mode = [Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC] in + Xapi_stdext_unix.Unixext.with_file filename mode perms @@ fun fd -> + (* Writing 0 to the task file will automatically transform in writing + the current caller tid to the file. + + Writing 0 to the processes file will automatically write the caller's + pid to file. *) + let buf = "0\n" in + let len = String.length buf in + if Unix.write fd (Bytes.unsafe_of_string buf) 0 len <> len then + warn "writing current tid to %s failed" filename + with exn -> + warn "writing current tid to %s failed with exception: %s" filename + (Printexc.to_string exn) + + let attach_task group = + let tasks_file = dir_of group // "tasks" in + write_cur_tid_to_cgroup_file tasks_file + + let set_cur_cgroup ~originator = + match originator with + | Group.Originator.Internal_Host_SM -> + attach_task (Group Internal_Host_SM) + | Group.Originator.EXTERNAL -> + attach_task (Group EXTERNAL) + + let set_cgroup creator = + set_cur_cgroup ~originator:creator.Group.Creator.originator end diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli index ce87efc593c..dc39735425b 100644 --- a/ocaml/libs/tgroup/tgroup.mli +++ b/ocaml/libs/tgroup/tgroup.mli @@ -67,4 +67,8 @@ module Cgroup : sig val init : string -> unit (** [init dir] initializes the hierachy of cgroups associated to all [Group.t] types under the directory [dir].*) + + val set_cgroup : Group.Creator.t -> unit + (** [set_cgroup c] sets the current xapi thread in a cgroup based on the + creator [c].*) end From 0714ce27235c341919cff512ea8cefb6c9802668 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 25 Sep 2024 15:39:17 +0100 Subject: [PATCH 10/49] CP-51488: Set `tgroup` based on request header. Add functionality of setting the tgroup based on a http header named `originator`. Signed-off-by: Gabriel Buica --- ocaml/libs/tgroup/dune | 2 +- ocaml/libs/tgroup/tgroup.ml | 11 +++++++++++ ocaml/libs/tgroup/tgroup.mli | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune index 025a5adc891..d1cccfbe444 100644 --- a/ocaml/libs/tgroup/dune +++ b/ocaml/libs/tgroup/dune @@ -1,3 +1,3 @@ (library (name tgroup) - (libraries xapi-log xapi-stdext-unix xapi-stdext-pervasives)) + (libraries xapi-log xapi-stdext-unix)) diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index b4c87b087b9..24d8cd6e389 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -162,3 +162,14 @@ module Cgroup = struct let set_cgroup creator = set_cur_cgroup ~originator:creator.Group.Creator.originator end + +let of_originator originator = + originator |> Group.Creator.make |> Cgroup.set_cgroup + +let of_req_originator originator = + try + originator + |> Option.value ~default:Group.Originator.(to_string EXTERNAL) + |> Group.Originator.of_string + |> of_originator + with _ -> () diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli index dc39735425b..e1d5c7f0b85 100644 --- a/ocaml/libs/tgroup/tgroup.mli +++ b/ocaml/libs/tgroup/tgroup.mli @@ -72,3 +72,7 @@ module Cgroup : sig (** [set_cgroup c] sets the current xapi thread in a cgroup based on the creator [c].*) end + +val of_req_originator : string option -> unit +(** [of_req_originator o] same as [of_originator] but it classifies based on the +http request header.*) From 3d822a71c7a0ed9baa6365773201b8ea88154541 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 2 Oct 2024 13:58:08 +0100 Subject: [PATCH 11/49] CP-49064: Init cgroups at xapi startup Signed-off-by: Gabriel Buica --- dune-project | 8 ++++++++ ocaml/libs/tgroup/dune | 1 + ocaml/xapi/dune | 2 ++ ocaml/xapi/xapi.ml | 4 ++++ ocaml/xapi/xapi_globs.ml | 3 +++ tgroup.opam | 28 ++++++++++++++++++++++++++++ xapi.opam | 1 + 7 files changed, 47 insertions(+) create mode 100644 tgroup.opam diff --git a/dune-project b/dune-project index 15ff4a5fbfa..649162d0fc1 100644 --- a/dune-project +++ b/dune-project @@ -37,6 +37,13 @@ ) ) +(package + (name tgroup) + (depends + xapi-log + xapi-stdext-unix) +) + (package (name xml-light2) ) @@ -373,6 +380,7 @@ tar tar-unix uri + tgroup (uuid (= :version)) uutf uuidm diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune index d1cccfbe444..40b75ad1bbd 100644 --- a/ocaml/libs/tgroup/dune +++ b/ocaml/libs/tgroup/dune @@ -1,3 +1,4 @@ (library (name tgroup) + (public_name tgroup) (libraries xapi-log xapi-stdext-unix)) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 9f3e5f825fa..048bd4963f9 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -151,6 +151,7 @@ tapctl tar tar-unix + tgroup threads.posix tracing unixpwd @@ -237,6 +238,7 @@ rpclib.json rpclib.xml stunnel + tgroup threads.posix tracing xapi-backtrace diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index ca87e740efb..6c2475c7929 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1058,6 +1058,10 @@ let server_init () = ; ("Initialising random number generator", [], random_setup) ; ("Initialise TLS verification", [], init_tls_verification) ; ("Running startup check", [], startup_check) + ; ( "Initialize cgroups via tgroup" + , [] + , fun () -> Tgroup.Cgroup.init Xapi_globs.xapi_requests_cgroup + ) ; ( "Registering SMAPIv1 plugins" , [Startup.OnlyMaster] , Sm.register ~__context diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index d59af9e2e49..f2912ab1bb6 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1059,6 +1059,9 @@ let disable_webserver = ref false let test_open = ref 0 +let xapi_requests_cgroup = + "/sys/fs/cgroup/cpu/control.slice/xapi.service/request" + let xapi_globs_spec = [ ( "master_connection_reset_timeout" diff --git a/tgroup.opam b/tgroup.opam new file mode 100644 index 00000000000..423b4628877 --- /dev/null +++ b/tgroup.opam @@ -0,0 +1,28 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.15"} + "xapi-log" + "xapi-stdext-unix" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" diff --git a/xapi.opam b/xapi.opam index 098d8463442..e9dce9e47f5 100644 --- a/xapi.opam +++ b/xapi.opam @@ -63,6 +63,7 @@ depends: [ "tar" "tar-unix" "uri" + "tgroup" "uuid" {= version} "uutf" "uuidm" From e90b32ca2aa5e3532dc80e6271923046e174138d Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Mon, 28 Oct 2024 11:56:41 +0000 Subject: [PATCH 12/49] CP-50537: Always reset `_extra_headers` when making a connection. Clears the `extra_headers` of `UDSTransport` instance when making a connection. Previously, this was done only when tracing was enabled inside the `with_tracecontext` method to avoid the header duplication when `make_connection` was used multiple times. Currently, there is not other use of `add_extra_headers` or other update to the `_extra_headers`, making it safe to clear it when we make a new connection. (`xmlrpclib.Transport` updates the `_extra_headers` attribute only inside `make_connection` method but we override this method with our own for `UDSTransport`.) Signed-off-by: Gabriel Buica --- python3/examples/XenAPI/XenAPI.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python3/examples/XenAPI/XenAPI.py b/python3/examples/XenAPI/XenAPI.py index e37f8813b6e..08f61749142 100644 --- a/python3/examples/XenAPI/XenAPI.py +++ b/python3/examples/XenAPI/XenAPI.py @@ -106,6 +106,7 @@ def connect(self): class UDSTransport(xmlrpclib.Transport): def add_extra_header(self, key, value): self._extra_headers += [ (key,value) ] + def with_tracecontext(self): if otel: headers = {} @@ -114,10 +115,14 @@ def with_tracecontext(self): # pylint: disable=possibly-used-before-assignment propagators = propagate.get_global_textmap() propagators.inject(headers, ctx) - self._extra_headers = [] + for k, v in headers.items(): self.add_extra_header(k, v) + def make_connection(self, host): + # clear the extra headers when making a new connection. This makes sure + # headers such as "traceparent" do not get duplicated. + self._extra_headers = [] self.with_tracecontext() # compatibility with parent xmlrpclib.Transport HTTP/1.1 support From 4f5dbb5e6d24e31108a1df860bf4aba665ade690 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 23 Oct 2024 10:51:45 +0100 Subject: [PATCH 13/49] CP-50537: Propagate originator as a http request header XenAPI.py now passes an additional originator header when making requests to xapi, if the "ORIGINATOR" env var is present. Sm_exec now passes an env var, "ORIGINATOR=SM". To classify the threads correctly, we first need to determine the requests originators. This commit makes it possibly to explicitly state the originators as headers. Signed-off-by: Gabriel Buica --- ocaml/libs/http-lib/http.ml | 10 ++++++++++ ocaml/libs/http-lib/http.mli | 2 ++ ocaml/xapi/sm_exec.ml | 11 +++++++++-- ocaml/xapi/xapi_observer_components.ml | 8 ++++---- ocaml/xapi/xapi_observer_components.mli | 3 ++- python3/examples/XenAPI/XenAPI.py | 7 +++++++ 6 files changed, 34 insertions(+), 7 deletions(-) diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index a19745576ce..8f352eb9237 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -132,6 +132,8 @@ module Hdr = struct let location = "location" + let originator = "originator" + let traceparent = "traceparent" let hsts = "strict-transport-security" @@ -688,6 +690,14 @@ module Request = struct let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body + let with_originator_of req f = + Option.iter + (fun req -> + let originator = List.assoc_opt Hdr.originator req.additional_headers in + f originator + ) + req + let traceparent_of req = let open Tracing in let ( let* ) = Option.bind in diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 3fbae8e4c6f..21fc00a8ee6 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -129,6 +129,8 @@ module Request : sig val to_wire_string : t -> string (** [to_wire_string t] returns a string which could be sent to a server *) + val with_originator_of : t option -> (string option -> unit) -> unit + val traceparent_of : t -> Tracing.Span.t option val with_tracing : diff --git a/ocaml/xapi/sm_exec.ml b/ocaml/xapi/sm_exec.ml index 28cdd11e07b..c95c3bcb28e 100644 --- a/ocaml/xapi/sm_exec.ml +++ b/ocaml/xapi/sm_exec.ml @@ -38,6 +38,13 @@ let with_dbg ~name ~dbg f = (*********************************************************************************************) (* Random utility functions *) +let env_vars = + Array.concat + [ + Forkhelpers.default_path_env_pair + ; Env_record.to_string_array [Env_record.pair ("ORIGINATOR", "SM")] + ] + type call = { (* All calls are performed by a specific Host with a special Session and device_config *) host_ref: API.ref_host @@ -355,9 +362,9 @@ let exec_xmlrpc ~dbg ?context:_ ?(needs_session = true) (driver : string) let env, exe, args = match Xapi_observer_components.is_smapi_enabled () with | false -> - (None, exe, args) + (Some env_vars, exe, args) | true -> - Xapi_observer_components.env_exe_args_of + Xapi_observer_components.env_exe_args_of ~env_vars ~component:Xapi_observer_components.SMApi ~exe ~args in Forkhelpers.execute_command_get_output ?tracing:di.tracing ?env diff --git a/ocaml/xapi/xapi_observer_components.ml b/ocaml/xapi/xapi_observer_components.ml index d3e0587b143..0b3b884f465 100644 --- a/ocaml/xapi/xapi_observer_components.ml +++ b/ocaml/xapi/xapi_observer_components.ml @@ -101,12 +101,12 @@ let ( // ) = Filename.concat let dir_name_of_component component = Xapi_globs.observer_config_dir // to_string component // "enabled" -let env_exe_args_of ~component ~exe ~args = +let env_exe_args_of ~env_vars ~component ~exe ~args = let dir_name_value = Filename.quote (dir_name_of_component component) in - let env_vars = + let new_env_vars = Array.concat [ - Forkhelpers.default_path_env_pair + env_vars ; Env_record.to_string_array [ Env_record.pair ("OBSERVER_CONFIG_DIR", dir_name_value) @@ -116,4 +116,4 @@ let env_exe_args_of ~component ~exe ~args = in let args = "-m" :: "observer" :: exe :: args in let new_exe = Xapi_globs.python3_path in - (Some env_vars, new_exe, args) + (Some new_env_vars, new_exe, args) diff --git a/ocaml/xapi/xapi_observer_components.mli b/ocaml/xapi/xapi_observer_components.mli index 55bdf7e7f05..9e046dddaf3 100644 --- a/ocaml/xapi/xapi_observer_components.mli +++ b/ocaml/xapi/xapi_observer_components.mli @@ -63,7 +63,8 @@ val dir_name_of_component : t -> string *) val env_exe_args_of : - component:t + env_vars:string array + -> component:t -> exe:string -> args:string list -> string array option * string * string list diff --git a/python3/examples/XenAPI/XenAPI.py b/python3/examples/XenAPI/XenAPI.py index 08f61749142..012dcf40de7 100644 --- a/python3/examples/XenAPI/XenAPI.py +++ b/python3/examples/XenAPI/XenAPI.py @@ -119,11 +119,18 @@ def with_tracecontext(self): for k, v in headers.items(): self.add_extra_header(k, v) + def with_originator(self): + originator_k = "ORIGINATOR" + originator_v = os.getenv(originator_k, None) + if originator_v: + self.add_extra_header(originator_k.lower(), originator_v) + def make_connection(self, host): # clear the extra headers when making a new connection. This makes sure # headers such as "traceparent" do not get duplicated. self._extra_headers = [] self.with_tracecontext() + self.with_originator() # compatibility with parent xmlrpclib.Transport HTTP/1.1 support if self._connection and host == self._connection[0]: From 13cff9f924a2b2f400dd8bce14927467e42022f9 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 22 Oct 2024 14:57:44 +0100 Subject: [PATCH 14/49] CP-51489: Classify threads based on http requests. For now, the thread executing `Xapi.server_init` and it's children are classified as External. The only excception are http requests that come through the smapi internally. If those contain the originator header with the value set as "sm", the thread executing the request will be classified as internal. This represents the first phase of classifing xapi threads as internal vs external. Signed-off-by: Gabriel Buica --- dune-project | 1 + http-lib.opam | 1 + ocaml/libs/http-lib/dune | 1 + ocaml/libs/http-lib/http_svr.ml | 2 +- ocaml/libs/tgroup/tgroup.ml | 21 +++++++++++++-------- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/dune-project b/dune-project index 649162d0fc1..e69a04e745a 100644 --- a/dune-project +++ b/dune-project @@ -593,6 +593,7 @@ This package provides an Lwt compatible interface to the library.") (safe-resources(= :version)) sha (stunnel (= :version)) + tgroup uri (uuid (= :version)) xapi-backtrace diff --git a/http-lib.opam b/http-lib.opam index df1b7735eb7..ea91e9c942d 100644 --- a/http-lib.opam +++ b/http-lib.opam @@ -22,6 +22,7 @@ depends: [ "safe-resources" {= version} "sha" "stunnel" {= version} + "tgroup" "uri" "uuid" {= version} "xapi-backtrace" diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 2990fda2453..c74d6a52e32 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -44,6 +44,7 @@ http_lib ipaddr polly + tgroup threads.posix tracing uri diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 54a8b96ba73..d412b9d025b 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -560,7 +560,7 @@ let handle_connection ~header_read_timeout ~header_total_timeout read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length:max_header_length ss in - + Http.Request.with_originator_of req Tgroup.of_req_originator ; (* 2. now we attempt to process the request *) let finished = Option.fold ~none:true diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index 24d8cd6e389..557b66bc1c9 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -124,12 +124,6 @@ module Cgroup = struct (fun dir -> dir // Group.to_cgroup group) (Atomic.get cgroup_dir) - let init dir = - let () = Atomic.set cgroup_dir (Some dir) in - Group.all - |> List.filter_map dir_of - |> List.iter (fun dir -> Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755) - let write_cur_tid_to_cgroup_file filename = try let perms = 0o640 in @@ -149,8 +143,12 @@ module Cgroup = struct (Printexc.to_string exn) let attach_task group = - let tasks_file = dir_of group // "tasks" in - write_cur_tid_to_cgroup_file tasks_file + Option.iter + (fun dir -> + let tasks_file = dir // "tasks" in + write_cur_tid_to_cgroup_file tasks_file + ) + (dir_of group) let set_cur_cgroup ~originator = match originator with @@ -161,6 +159,13 @@ module Cgroup = struct let set_cgroup creator = set_cur_cgroup ~originator:creator.Group.Creator.originator + + let init dir = + let () = Atomic.set cgroup_dir (Some dir) in + Group.all + |> List.filter_map dir_of + |> List.iter (fun dir -> Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755) ; + set_cur_cgroup ~originator:Group.Originator.EXTERNAL end let of_originator originator = From efaf3f0c86cb075c9e2e6d546329b3625d0a55bf Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Mon, 4 Nov 2024 16:07:26 +0000 Subject: [PATCH 15/49] CP-50537: Add a guard in `xapi_globs`, `Xapi_globs.tgroups_enabled`. Adds a configurable variable in `xapi_globs`, `tgroups_enabled` that is meant to ask a guard for tgroup classification of the threads. If the guard is `false` all Tgroups functionality should act as a no op. For instance, adding the line: tgroups-enabled = false will result in the thread classification being skipped. Signed-off-by: Gabriel Buica --- ocaml/libs/http-lib/http_svr.ml | 2 ++ ocaml/libs/tgroup/tgroup.ml | 16 ++++++++++------ ocaml/xapi/xapi.ml | 4 +++- ocaml/xapi/xapi_globs.ml | 7 +++++++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index d412b9d025b..64c9c929177 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -560,7 +560,9 @@ let handle_connection ~header_read_timeout ~header_total_timeout read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length:max_header_length ss in + Http.Request.with_originator_of req Tgroup.of_req_originator ; + (* 2. now we attempt to process the request *) let finished = Option.fold ~none:true diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index 557b66bc1c9..a0639974670 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -172,9 +172,13 @@ let of_originator originator = originator |> Group.Creator.make |> Cgroup.set_cgroup let of_req_originator originator = - try - originator - |> Option.value ~default:Group.Originator.(to_string EXTERNAL) - |> Group.Originator.of_string - |> of_originator - with _ -> () + Option.iter + (fun _ -> + try + originator + |> Option.value ~default:Group.Originator.(to_string EXTERNAL) + |> Group.Originator.of_string + |> of_originator + with _ -> () + ) + (Atomic.get Cgroup.cgroup_dir) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 6c2475c7929..9b87f1de6b5 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1060,7 +1060,9 @@ let server_init () = ; ("Running startup check", [], startup_check) ; ( "Initialize cgroups via tgroup" , [] - , fun () -> Tgroup.Cgroup.init Xapi_globs.xapi_requests_cgroup + , fun () -> + if !Xapi_globs.tgroups_enabled then + Tgroup.Cgroup.init Xapi_globs.xapi_requests_cgroup ) ; ( "Registering SMAPIv1 plugins" , [Startup.OnlyMaster] diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index f2912ab1bb6..ef3c5ce66a9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1059,6 +1059,8 @@ let disable_webserver = ref false let test_open = ref 0 +let tgroups_enabled = ref false + let xapi_requests_cgroup = "/sys/fs/cgroup/cpu/control.slice/xapi.service/request" @@ -1624,6 +1626,11 @@ let other_options = , (fun () -> string_of_bool !disable_webserver) , "Disable the host webserver" ) + ; ( "tgroups-enabled" + , Arg.Set tgroups_enabled + , (fun () -> string_of_bool !tgroups_enabled) + , "Turn on tgroups classification" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. From f713c79f0fc4827c4f4ade4ee4c66881351f6548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:38:19 +0100 Subject: [PATCH 16/49] CP-51692: feat(use-event-next): introduce use-event-next configuration flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is more efficient: we can watch a single task, instead of everything in the DB. Feature-flag: use-event-next No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-consts/constants.ml | 3 +++ ocaml/xapi/xapi_globs.ml | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 2c7fc49e179..d3ee0bf8531 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -275,6 +275,9 @@ let owner_key = "owner" (* set in VBD other-config to indicate that clients can delete the attached VDI on VM uninstall if they want.. *) +(* xapi-cli-server doesn't link xapi-globs *) +let use_event_next = ref true + (* the time taken to wait before restarting in a different mode for pool eject/join operations *) let fuse_time = ref 10. diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index efdcabfbdb6..0c061731924 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1395,6 +1395,11 @@ let other_options = , (fun () -> string_of_bool !Db_globs.idempotent_map) , "True if the add_to_ API calls should be idempotent" ) + ; ( "use-event-next" + , Arg.Set Constants.use_event_next + , (fun () -> string_of_bool !Constants.use_event_next) + , "Use deprecated Event.next instead of Event.from" + ) ; ( "nvidia_multi_vgpu_enabled_driver_versions" , Arg.String (fun x -> From 3a36ed99e9a00272f5c39855a98315da46e70f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:38:19 +0100 Subject: [PATCH 17/49] CP-52625: workaround Rpc.Int32 parsing bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit int32_of_rpc doesn't accept Int32 as input, just Int because none of the current deserializers actually produce an Int32. (Int32 is only used by serializers to emit something different). This is an upstream ocaml-rpc bug that should be fixed, meanwhile convert Rpc.Int32 to Rpc.Int, so that the 'fake_rpc' inside XAPI can use Event.from. Otherwise you get this error: ``` Expected int32, got 'I32(0) ``` Signed-off-by: Edwin Török --- ocaml/xapi-types/event_types.ml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ocaml/xapi-types/event_types.ml b/ocaml/xapi-types/event_types.ml index fcd8840e59f..83c82b0bc8d 100644 --- a/ocaml/xapi-types/event_types.ml +++ b/ocaml/xapi-types/event_types.ml @@ -77,6 +77,24 @@ let rec rpc_of_event_from e = ; ("token", rpc_of_token e.token) ] +(* xmlrpc and jsonrpc would map Int32 to Int, but int32_of_rpc can't actually parse + an Int32 back as an int32... this is a bug in ocaml-rpc that should be fixed. + meanwhile work it around by mapping Rpc.Int32 to Rpc.Int upon receiving the message + (it is only Rpc.Int32 for backward compat with non-XAPI Xmlrpc clients) +*) + +let rec fixup_int32 = function + | Rpc.Dict dict -> + Rpc.Dict (List.map fixup_kv dict) + | Rpc.Int32 i -> + Rpc.Int (Int64.of_int32 i) + | rpc -> + rpc + +and fixup_kv (k, v) = (k, fixup_int32 v) + +let event_from_of_rpc rpc = rpc |> fixup_int32 |> event_from_of_rpc + (** Return result of an events.from call *) open Printf From e40c1ae0938b7c337d740c101bc644c75642f16a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:38:19 +0100 Subject: [PATCH 18/49] CP-51692: feat(use-event-next): cli_util: use Event.from instead of Event.next MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is more efficient: we can watch a single task, instead of everything in the DB. Feature-flag: use-event-next Signed-off-by: Edwin Török --- ocaml/xapi-cli-server/cli_util.ml | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_util.ml b/ocaml/xapi-cli-server/cli_util.ml index 48fd9392ef5..75c4f30360f 100644 --- a/ocaml/xapi-cli-server/cli_util.ml +++ b/ocaml/xapi-cli-server/cli_util.ml @@ -42,21 +42,41 @@ exception Cli_failure of string (** call [callback task_record] on every update to the task, until it completes or fails *) let track callback rpc (session_id : API.ref_session) task = - let classes = ["task"] in + let use_event_next = !Constants.use_event_next in + let classes = + if use_event_next then + ["task"] + else + [Printf.sprintf "task/%s" (Ref.string_of task)] + in finally (fun () -> let finished = ref false in while not !finished do - Client.Event.register ~rpc ~session_id ~classes ; + if use_event_next then + Client.Event.register ~rpc ~session_id ~classes ; try (* Need to check once after registering to avoid a race *) finished := Client.Task.get_status ~rpc ~session_id ~self:task <> `pending ; + let token = ref "" in while not !finished do let events = - Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) + if use_event_next then + let events = + Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) + in + List.map Event_helper.record_of_event events + else + let event_from = + Event_types.event_from_of_rpc + (Client.Event.from ~rpc ~session_id ~classes ~token:!token + ~timeout:30. + ) + in + token := event_from.token ; + List.map Event_helper.record_of_event event_from.events in - let events = List.map Event_helper.record_of_event events in List.iter (function | Event_helper.Task (t, Some t_rec) when t = task -> From ace50ae4506fa2bbe771c6b5ff0ad4653d329821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 20:43:49 +0100 Subject: [PATCH 19/49] CP-51692: feat(use-event-next): xe event-wait: use Event.from instead of Event.next MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Feature flag: use-event-next Signed-off-by: Edwin Török --- ocaml/xapi-cli-server/cli_operations.ml | 195 +++++++++++++----------- 1 file changed, 104 insertions(+), 91 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 1e8ba0f3b37..4f61e843140 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -2848,8 +2848,6 @@ exception Finished let event_wait_gen rpc session_id classname record_matches = (* Immediately register *) let classes = [classname] in - Client.Event.register ~rpc ~session_id ~classes ; - debug "Registered for events" ; (* Check to see if the condition is already satisfied - get all objects of whatever class specified... *) let poll () = let current_tbls = @@ -2930,96 +2928,111 @@ let event_wait_gen rpc session_id classname record_matches = in List.exists record_matches all_recs in - finally - (fun () -> - if not (poll ()) then - try - while true do - try - let events = - Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) - in - let doevent event = - let tbl = - match Event_helper.record_of_event event with - | Event_helper.VM (r, Some x) -> - let record = vm_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.VDI (r, Some x) -> - let record = vdi_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.SR (r, Some x) -> - let record = sr_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.Host (r, Some x) -> - let record = host_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.Network (r, Some x) -> - let record = net_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.VIF (r, Some x) -> - let record = vif_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.PIF (r, Some x) -> - let record = pif_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.VBD (r, Some x) -> - let record = vbd_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.PBD (r, Some x) -> - let record = pbd_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.Pool (r, Some x) -> - let record = pool_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.Task (r, Some x) -> - let record = task_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.VMSS (r, Some x) -> - let record = vmss_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | Event_helper.Secret (r, Some x) -> - let record = secret_record rpc session_id r in - record.setrefrec (r, x) ; - record.fields - | _ -> - failwith - ("Cli listening for class '" - ^ classname - ^ "' not currently implemented" - ) - in - let record = - List.map (fun r -> (r.name, fun () -> safe_get_field r)) tbl - in - if record_matches record then raise Finished + let use_event_next = !Constants.use_event_next in + let run () = + if not (poll ()) then + try + let token = ref "" in + while true do + let events = + if use_event_next then + Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) + else + let event_from = + Event_types.event_from_of_rpc + (Client.Event.from ~rpc ~session_id ~timeout:30. ~token:!token + ~classes + ) in - List.iter doevent - (List.filter (fun e -> e.Event_types.snapshot <> None) events) - with - | Api_errors.Server_error (code, _) - when code = Api_errors.events_lost - -> - debug "Got EVENTS_LOST; reregistering" ; - Client.Event.unregister ~rpc ~session_id ~classes ; - Client.Event.register ~rpc ~session_id ~classes ; - if poll () then raise Finished - done - with Finished -> () - ) - (fun () -> Client.Event.unregister ~rpc ~session_id ~classes) + token := event_from.token ; + event_from.events + in + let doevent event = + let tbl = + match Event_helper.record_of_event event with + | Event_helper.VM (r, Some x) -> + let record = vm_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.VDI (r, Some x) -> + let record = vdi_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.SR (r, Some x) -> + let record = sr_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.Host (r, Some x) -> + let record = host_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.Network (r, Some x) -> + let record = net_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.VIF (r, Some x) -> + let record = vif_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.PIF (r, Some x) -> + let record = pif_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.VBD (r, Some x) -> + let record = vbd_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.PBD (r, Some x) -> + let record = pbd_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.Pool (r, Some x) -> + let record = pool_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.Task (r, Some x) -> + let record = task_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.VMSS (r, Some x) -> + let record = vmss_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | Event_helper.Secret (r, Some x) -> + let record = secret_record rpc session_id r in + record.setrefrec (r, x) ; + record.fields + | _ -> + failwith + ("Cli listening for class '" + ^ classname + ^ "' not currently implemented" + ) + in + let record = + List.map (fun r -> (r.name, fun () -> safe_get_field r)) tbl + in + if record_matches record then raise_notrace Finished + in + List.iter doevent + (List.filter (fun e -> e.Event_types.snapshot <> None) events) + done + with + | Api_errors.Server_error (code, _) + when code = Api_errors.events_lost && use_event_next -> + debug "Got EVENTS_LOST; reregistering" ; + Client.Event.unregister ~rpc ~session_id ~classes ; + Client.Event.register ~rpc ~session_id ~classes ; + if poll () then raise Finished + | Finished -> + () + in + if use_event_next then ( + Client.Event.register ~rpc ~session_id ~classes ; + debug "Registered for events" ; + finally run (fun () -> Client.Event.unregister ~rpc ~session_id ~classes) + ) else + run () (* We're done. Unregister and finish *) From 4068f9de99a510d4d4acfce1fd187572ddc77e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 7 Nov 2024 17:58:17 +0000 Subject: [PATCH 20/49] CA-401651: stunnel_cache: run the cache expiry code periodically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously it'd only run when we added or removed entries, but on an idle system we'd keep a large number of connections open that we don't need, and this then exceeded the connection limits on the coordinator. Signed-off-by: Edwin Török --- ocaml/libs/stunnel/stunnel_cache.ml | 5 +++-- ocaml/libs/stunnel/stunnel_cache.mli | 6 ++++++ ocaml/xapi/xapi_globs.ml | 2 ++ ocaml/xapi/xapi_periodic_scheduler_init.ml | 4 ++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel_cache.ml b/ocaml/libs/stunnel/stunnel_cache.ml index d69fbf10091..54ebe9c53df 100644 --- a/ocaml/libs/stunnel/stunnel_cache.ml +++ b/ocaml/libs/stunnel/stunnel_cache.ml @@ -42,9 +42,9 @@ type endpoint = {host: string; port: int} (* Need to limit the absolute number of stunnels as well as the maximum age *) let max_stunnel = 70 -let max_age = 180. *. 60. (* seconds *) +let max_age = ref (180. *. 60.) (* seconds *) -let max_idle = 5. *. 60. (* seconds *) +let max_idle = ref (5. *. 60.) (* seconds *) (* The add function adds the new stunnel before doing gc, so the cache *) (* can briefly contain one more than maximum. *) @@ -104,6 +104,7 @@ let unlocked_gc () = let to_gc = ref [] in (* Find the ones which are too old *) let now = Unix.gettimeofday () in + let max_age = !max_age and max_idle = !max_idle in Tbl.iter !stunnels (fun idx stunnel -> match Hashtbl.find_opt !times idx with | Some time -> diff --git a/ocaml/libs/stunnel/stunnel_cache.mli b/ocaml/libs/stunnel/stunnel_cache.mli index 9a2923dfcbf..6c581c422ff 100644 --- a/ocaml/libs/stunnel/stunnel_cache.mli +++ b/ocaml/libs/stunnel/stunnel_cache.mli @@ -46,3 +46,9 @@ val flush : unit -> unit val gc : unit -> unit (** GCs old stunnels *) + +val max_age : float ref +(** maximum time a connection is kept in the stunnel cache, counted from the time it got initially added to the cache *) + +val max_idle : float ref +(** maximum time a connection is kept in the stunnel cache, counted from the most recent time it got (re)added to the cache. *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index ef3c5ce66a9..6a8e7bb21cd 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1145,6 +1145,8 @@ let xapi_globs_spec = ; ("conn_limit_tcp", Int conn_limit_tcp) ; ("conn_limit_unix", Int conn_limit_unix) ; ("conn_limit_clientcert", Int conn_limit_clientcert) + ; ("stunnel_cache_max_age", Float Stunnel_cache.max_age) + ; ("stunnel_cache_max_idle", Float Stunnel_cache.max_idle) ; ("export_interval", Float export_interval) ; ("max_spans", Int max_spans) ; ("max_traces", Int max_traces) diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index 5b49ebcde50..6fc6d0de299 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -114,6 +114,10 @@ let register ~__context = Xapi_host.alert_if_tls_verification_was_emergency_disabled ~__context ) ) ; + let stunnel_period = !Stunnel_cache.max_idle /. 2. in + Xapi_periodic_scheduler.add_to_queue "Check stunnel cache expiry" + (Xapi_periodic_scheduler.Periodic stunnel_period) stunnel_period + Stunnel_cache.gc ; if master && Db.Pool.get_update_sync_enabled ~__context From f9a523dccf805af91e8b4bc942cb5457b0834f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 7 Nov 2024 17:47:31 +0000 Subject: [PATCH 21/49] CA-401652: stunnel_cache: set stunnel size limit based on host role MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce separate coordinator_max_stunnel_cache and member_max_stunnel_cache settings, and set these on XAPI startup based on host role. No functional change, the defaults for both match the previous hardcoded value (70). Signed-off-by: Edwin Török --- ocaml/libs/stunnel/stunnel_cache.ml | 9 +++++++-- ocaml/libs/stunnel/stunnel_cache.mli | 5 +++++ ocaml/xapi/xapi.ml | 3 +++ ocaml/xapi/xapi_globs.ml | 6 ++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel_cache.ml b/ocaml/libs/stunnel/stunnel_cache.ml index 54ebe9c53df..be865a216dc 100644 --- a/ocaml/libs/stunnel/stunnel_cache.ml +++ b/ocaml/libs/stunnel/stunnel_cache.ml @@ -40,7 +40,11 @@ let debug = if debug_enabled then debug else ignore_log type endpoint = {host: string; port: int} (* Need to limit the absolute number of stunnels as well as the maximum age *) -let max_stunnel = 70 +let max_stunnel = Atomic.make 70 + +let set_max_stunnel n = + D.info "Setting max_stunnel = %d" n ; + Atomic.set max_stunnel n let max_age = ref (180. *. 60.) (* seconds *) @@ -48,7 +52,7 @@ let max_idle = ref (5. *. 60.) (* seconds *) (* The add function adds the new stunnel before doing gc, so the cache *) (* can briefly contain one more than maximum. *) -let capacity = max_stunnel + 1 +let capacity = Atomic.get max_stunnel + 1 (** An index of endpoints to stunnel IDs *) let index : (endpoint, int list) Hashtbl.t ref = ref (Hashtbl.create capacity) @@ -123,6 +127,7 @@ let unlocked_gc () = debug "%s: found no entry for idx=%d" __FUNCTION__ idx ) ; let num_remaining = List.length all_ids - List.length !to_gc in + let max_stunnel = Atomic.get max_stunnel in if num_remaining > max_stunnel then ( let times' = Hashtbl.fold (fun k v acc -> (k, v) :: acc) !times [] in let times' = diff --git a/ocaml/libs/stunnel/stunnel_cache.mli b/ocaml/libs/stunnel/stunnel_cache.mli index 6c581c422ff..724642d1dc0 100644 --- a/ocaml/libs/stunnel/stunnel_cache.mli +++ b/ocaml/libs/stunnel/stunnel_cache.mli @@ -19,6 +19,11 @@ HTTP 1.1 should be used and the connection should be kept-alive. *) +val set_max_stunnel : int -> unit +(** [set_max_stunnel] set the maximum number of unusued, but cached client stunnel connections. + This should be a low number on pool members, to avoid hitting limits on the coordinator with large pools. + *) + val with_connect : ?use_fork_exec_helper:bool -> ?write_to_log:(string -> unit) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 9b87f1de6b5..d1784b50776 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1143,6 +1143,8 @@ let server_init () = ] ; ( match Pool_role.get_role () with | Pool_role.Master -> + Stunnel_cache.set_max_stunnel + !Xapi_globs.coordinator_max_stunnel_cache ; () | Pool_role.Broken -> info "This node is broken; moving straight to emergency mode" ; @@ -1151,6 +1153,7 @@ let server_init () = server_run_in_emergency_mode () | Pool_role.Slave _ -> info "Running in 'Pool Slave' mode" ; + Stunnel_cache.set_max_stunnel !Xapi_globs.member_max_stunnel_cache ; (* Set emergency mode until we actually talk to the master *) Xapi_globs.slave_emergency_mode := true ; (* signal the init script that it should succeed even though we're bust *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 6a8e7bb21cd..af1e06fe7e1 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1011,6 +1011,10 @@ let header_total_timeout_tcp = ref 60. let max_header_length_tcp = ref 1024 (* Maximum accepted size of HTTP headers in bytes (on TCP only) *) +let coordinator_max_stunnel_cache = ref 70 + +let member_max_stunnel_cache = ref 70 + let conn_limit_tcp = ref 800 let conn_limit_unix = ref 1024 @@ -1142,6 +1146,8 @@ let xapi_globs_spec = ; ("header_read_timeout_tcp", Float header_read_timeout_tcp) ; ("header_total_timeout_tcp", Float header_total_timeout_tcp) ; ("max_header_length_tcp", Int max_header_length_tcp) + ; ("coordinator_max_stunnel_cache", Int coordinator_max_stunnel_cache) + ; ("member_max_stunnel_cache", Int member_max_stunnel_cache) ; ("conn_limit_tcp", Int conn_limit_tcp) ; ("conn_limit_unix", Int conn_limit_unix) ; ("conn_limit_clientcert", Int conn_limit_clientcert) From 02cec08b985ffcf5aadb5f139cdaec618de597c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 21 Nov 2024 17:52:14 +0000 Subject: [PATCH 22/49] CA-388210: rename vm' to vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid labeled argument with ' in name Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index c69d28847d1..aec4f8ac884 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1432,9 +1432,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.introduce vdi_introduce_impl ; - let vdi_attach3_impl dbg _dp sr vdi' vm' _readwrite = + let vdi_attach3_impl dbg _dp sr vdi' vm _readwrite = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in vdi_attach_common dbg sr vdi domain >>>= fun response -> let convert_implementation = function | Xapi_storage.Data.XenDisk {params; extra; backend_type} -> @@ -1456,9 +1456,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.attach3 vdi_attach3_impl ; - let vdi_activate_common dbg sr vdi' vm' readonly = + let vdi_activate_common dbg sr vdi' vm readonly = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1483,17 +1483,17 @@ let bind ~volume_script_dir = ) |> wrap in - let vdi_activate3_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' false + let vdi_activate3_impl dbg _dp sr vdi' vm = + vdi_activate_common dbg sr vdi' vm false in S.VDI.activate3 vdi_activate3_impl ; - let vdi_activate_readonly_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' true + let vdi_activate_readonly_impl dbg _dp sr vdi' vm = + vdi_activate_common dbg sr vdi' vm true in S.VDI.activate_readonly vdi_activate_readonly_impl ; - let vdi_deactivate_impl dbg _dp sr vdi' vm' = + let vdi_deactivate_impl dbg _dp sr vdi' vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1514,9 +1514,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.deactivate vdi_deactivate_impl ; - let vdi_detach_impl dbg _dp sr vdi' vm' = + let vdi_detach_impl dbg _dp sr vdi' vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1564,9 +1564,9 @@ let bind ~volume_script_dir = |> wrap in S.SR.stat sr_stat_impl ; - let vdi_epoch_begin_impl dbg sr vdi' vm' persistent = + let vdi_epoch_begin_impl dbg sr vdi' vm persistent = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1602,9 +1602,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.epoch_begin vdi_epoch_begin_impl ; - let vdi_epoch_end_impl dbg sr vdi' vm' = + let vdi_epoch_end_impl dbg sr vdi' vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1627,9 +1627,9 @@ let bind ~volume_script_dir = S.VDI.epoch_end vdi_epoch_end_impl ; let vdi_set_persistent_impl _dbg _sr _vdi _persistent = return () |> wrap in S.VDI.set_persistent vdi_set_persistent_impl ; - let dp_destroy2 dbg _dp sr vdi' vm' _allow_leak = + let dp_destroy2 dbg _dp sr vdi' vm _allow_leak = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> From ed55521a4b7d7552f902a0975c3ead7066e38503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:08:25 +0100 Subject: [PATCH 23/49] CA-388210: drop unused domain parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This parameter was passed through unchanged. However VDI.epoch_begin/epoch_end doesn't actually need it, so drop it. We intend to change how we compute the 'domain' parameter, and VDI.epoch_begin/end wouldn't have sufficient information to compute it anymore. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index aec4f8ac884..17af7c60e4b 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -744,7 +744,7 @@ let vdi_of_volume x = ; persistent= true } -let choose_datapath ?(persistent = true) domain response = +let choose_datapath ?(persistent = true) response = (* We can only use a URI with a valid scheme, since we use the scheme to name the datapath plugin. *) let possible = @@ -789,7 +789,7 @@ let choose_datapath ?(persistent = true) domain response = | [] -> fail (missing_uri ()) | (script_dir, scheme, u) :: _us -> - return (fork_exec_rpc ~script_dir, scheme, u, domain) + return (fork_exec_rpc ~script_dir, scheme, u) (* Bind the implementations *) let bind ~volume_script_dir = @@ -863,7 +863,7 @@ let bind ~volume_script_dir = stat ~dbg ~sr ~vdi:temporary ) >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, _datapath, uri, domain) -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> return_data_rpc (fun () -> Datapath_client.attach (rpc ~dbg) dbg uri domain) in let wrap th = Rpc_lwt.T.put th in @@ -1472,7 +1472,7 @@ let bind ~volume_script_dir = stat ~dbg ~sr ~vdi:temporary ) >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, _datapath, uri, domain) -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> return_data_rpc (fun () -> let rpc = rpc ~dbg in if readonly then @@ -1506,7 +1506,7 @@ let bind ~volume_script_dir = stat ~dbg ~sr ~vdi:temporary ) >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, _datapath, uri, domain) -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> return_data_rpc (fun () -> Datapath_client.deactivate (rpc ~dbg) dbg uri domain ) @@ -1529,7 +1529,7 @@ let bind ~volume_script_dir = stat ~dbg ~sr ~vdi:temporary ) >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, _datapath, uri, domain) -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> return_data_rpc (fun () -> Datapath_client.detach (rpc ~dbg) dbg uri domain) ) |> wrap @@ -1564,14 +1564,12 @@ let bind ~volume_script_dir = |> wrap in S.SR.stat sr_stat_impl ; - let vdi_epoch_begin_impl dbg sr vdi' vm persistent = + let vdi_epoch_begin_impl dbg sr vdi' _vm persistent = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> - choose_datapath ~persistent domain response - >>>= fun (rpc, datapath, uri, _domain) -> + choose_datapath ~persistent response >>>= fun (rpc, datapath, uri) -> (* If non-persistent and the datapath plugin supports NONPERSISTENT then we delegate this to the datapath plugin. Otherwise we will make a temporary clone now and attach/detach etc this file. *) @@ -1602,13 +1600,12 @@ let bind ~volume_script_dir = |> wrap in S.VDI.epoch_begin vdi_epoch_begin_impl ; - let vdi_epoch_end_impl dbg sr vdi' vm = + let vdi_epoch_end_impl dbg sr vdi' _vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, datapath, uri, _domain) -> + choose_datapath response >>>= fun (rpc, datapath, uri) -> if Datapath_plugins.supports_feature datapath _nonpersistent then return_data_rpc (fun () -> Datapath_client.close (rpc ~dbg) dbg uri) else @@ -1642,7 +1639,7 @@ let bind ~volume_script_dir = stat ~dbg ~sr ~vdi:temporary ) >>>= fun response -> - choose_datapath domain response >>>= fun (rpc, _datapath, uri, domain) -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> return_data_rpc (fun () -> Datapath_client.deactivate (rpc ~dbg) dbg uri domain ) From bee9e05f1855e4c8e36af857bf293c3a541cd3cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:11:32 +0100 Subject: [PATCH 24/49] CA-388210: factor out computing the domain parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are going to change how we compute it, so factor it out into a function. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 17af7c60e4b..7413687efb7 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,6 +66,8 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] +let domain_of ~dp:_ ~vm = Storage_interface.Vm.string_of vm + (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type [Storage_interface.Exception.exnty]. The above client modules should only @@ -1432,9 +1434,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.introduce vdi_introduce_impl ; - let vdi_attach3_impl dbg _dp sr vdi' vm _readwrite = + let vdi_attach3_impl dbg dp sr vdi' vm _readwrite = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in + let domain = domain_of ~dp ~vm in vdi_attach_common dbg sr vdi domain >>>= fun response -> let convert_implementation = function | Xapi_storage.Data.XenDisk {params; extra; backend_type} -> @@ -1456,9 +1458,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.attach3 vdi_attach3_impl ; - let vdi_activate_common dbg sr vdi' vm readonly = + let vdi_activate_common dbg dp sr vdi' vm readonly = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in + let domain = domain_of ~dp ~vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1483,17 +1485,17 @@ let bind ~volume_script_dir = ) |> wrap in - let vdi_activate3_impl dbg _dp sr vdi' vm = - vdi_activate_common dbg sr vdi' vm false + let vdi_activate3_impl dbg dp sr vdi' vm = + vdi_activate_common dbg dp sr vdi' vm false in S.VDI.activate3 vdi_activate3_impl ; - let vdi_activate_readonly_impl dbg _dp sr vdi' vm = - vdi_activate_common dbg sr vdi' vm true + let vdi_activate_readonly_impl dbg dp sr vdi' vm = + vdi_activate_common dbg dp sr vdi' vm true in S.VDI.activate_readonly vdi_activate_readonly_impl ; - let vdi_deactivate_impl dbg _dp sr vdi' vm = + let vdi_deactivate_impl dbg dp sr vdi' vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in + let domain = domain_of ~dp ~vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1514,9 +1516,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.deactivate vdi_deactivate_impl ; - let vdi_detach_impl dbg _dp sr vdi' vm = + let vdi_detach_impl dbg dp sr vdi' vm = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in + let domain = domain_of ~dp ~vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1624,9 +1626,9 @@ let bind ~volume_script_dir = S.VDI.epoch_end vdi_epoch_end_impl ; let vdi_set_persistent_impl _dbg _sr _vdi _persistent = return () |> wrap in S.VDI.set_persistent vdi_set_persistent_impl ; - let dp_destroy2 dbg _dp sr vdi' vm _allow_leak = + let dp_destroy2 dbg dp sr vdi' vm _allow_leak = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm in + let domain = domain_of ~dp ~vm in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> From 77b8ae9866eb84bae3234f57b77bd11762f0d7ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:12:33 +0100 Subject: [PATCH 25/49] CA-388210: SMAPIv3 concurrency safety: send the (unique) datapath argument as domain for Dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dom0 allows multiple attaches of RO disks, used e.g. for VDI.copy. Send a unique value to SMAPIv3 plugins to avoid bugs to the lack of reference counting in those plugins. XAPI already sends a unique value here, either vbd/domid/device, or a fresh UUID (for storage migration). Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 7413687efb7..cdf0b647fe4 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,7 +66,22 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] -let domain_of ~dp:_ ~vm = Storage_interface.Vm.string_of vm +(** return a unique 'domain' string for Dom0, so that we can plug disks + multiple times (e.g. for copy). + + XAPI should give us a unique 'dp' (datapath) string, e.g. a UUID for storage migration, + or vbd/domid/device. + For regular guests keep the domain as passed by XAPI (an integer). + *) +let domain_of ~dp ~vm = + let vm = Storage_interface.Vm.string_of vm in + match vm with + | "0" -> + (* SM tries to use this in filesystem paths, so cannot have /, + and systemd might be a bit unhappy with - *) + "u0-" ^ dp |> String.map (function '/' | '-' -> '_' | c -> c) + | _ -> + vm (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type From 2686c6f0c86b13749ddc1a2314371564afd78ac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 26/49] CA-388210: SMAPIv3 debugging: log PID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Log PID on successful and failed operations, and log full cmdline for newly spawned processes. This can be used to debug stuck scripts, so that we know which invocation is the one that is stuck. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/lib.ml | 4 +++- ocaml/xapi-storage-script/lib.mli | 3 ++- ocaml/xapi-storage-script/main.ml | 26 ++++++++++++++++---------- ocaml/xapi-storage-script/test_lib.ml | 25 +++++++++++++++++++++---- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/ocaml/xapi-storage-script/lib.ml b/ocaml/xapi-storage-script/lib.ml index 9c9059432bf..a3beb9a8009 100644 --- a/ocaml/xapi-storage-script/lib.ml +++ b/ocaml/xapi-storage-script/lib.ml @@ -131,6 +131,7 @@ module Process = struct type t = { exit_status: (unit, exit_or_signal) Result.t + ; pid: int ; stdout: string ; stderr: string } @@ -176,6 +177,7 @@ module Process = struct let run ~env ~prog ~args ~input = let ( let@ ) f x = f x in let@ p = with_process ~env ~prog ~args in + let pid = p#pid in let sender = send p#stdin input in let receiver_out = receive p#stdout in let receiver_err = receive p#stderr in @@ -185,7 +187,7 @@ module Process = struct Lwt.both sender receiver >>= fun ((), (stdout, stderr)) -> p#status >>= fun status -> let exit_status = Output.exit_or_signal_of_unix status in - Lwt.return {Output.exit_status; stdout; stderr} + Lwt.return {Output.exit_status; pid; stdout; stderr} ) (function | Lwt.Canceled as exn -> diff --git a/ocaml/xapi-storage-script/lib.mli b/ocaml/xapi-storage-script/lib.mli index a55c4b81fbc..eae9183a174 100644 --- a/ocaml/xapi-storage-script/lib.mli +++ b/ocaml/xapi-storage-script/lib.mli @@ -65,6 +65,7 @@ module Process : sig type t = { exit_status: (unit, exit_or_signal) result + ; pid: int ; stdout: string ; stderr: string } @@ -78,7 +79,7 @@ module Process : sig -> Output.t Lwt.t (** Runs a cli program, writes [input] into its stdin, then closing the fd, and finally waits for the program to finish and returns the exit status, - its stdout and stderr. *) + the pid, and its stdout and stderr. *) end module DirWatcher : sig diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index cdf0b647fe4..7255845e5ea 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -477,6 +477,8 @@ let fork_exec_rpc : ) >>>= fun input -> let input = compat_in input |> Jsonrpc.to_string in + debug (fun m -> m "Running %s" @@ Filename.quote_command script_name args) + >>= fun () -> Process.run ~env ~prog:script_name ~args ~input >>= fun output -> let fail_because ~cause description = fail @@ -500,12 +502,13 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name output.Process.Output.stderr + m "%s[%d] failed, stderr: %s" script_name output.pid + output.Process.Output.stderr ) >>= fun () -> fail_because "non-zero exit and bad json on stdout" @@ -516,12 +519,12 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name + m "%s[%d] failed, stderr: %s" script_name output.pid output.Process.Output.stderr ) >>= fun () -> @@ -532,7 +535,9 @@ let fork_exec_rpc : ) ) | Error (Signal signal) -> - error (fun m -> m "%s caught a signal and failed" script_name) + error (fun m -> + m "%s[%d] caught a signal and failed" script_name output.pid + ) >>= fun () -> fail_because "signalled" ~cause:(Signal.to_string signal) | Ok () -> ( (* Parse the json on stdout. We get back a JSON-RPC @@ -544,8 +549,8 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s succeeded but printed bad json: %s" script_name - output.Process.Output.stdout + m "%s[%d] succeeded but printed bad json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> fail @@ -554,7 +559,8 @@ let fork_exec_rpc : ) | Ok response -> info (fun m -> - m "%s succeeded: %s" script_name output.Process.Output.stdout + m "%s[%d] succeeded: %s" script_name output.pid + output.Process.Output.stdout ) >>= fun () -> let response = compat_out response in diff --git a/ocaml/xapi-storage-script/test_lib.ml b/ocaml/xapi-storage-script/test_lib.ml index e016d1368a4..ca1d0a07a1c 100644 --- a/ocaml/xapi-storage-script/test_lib.ml +++ b/ocaml/xapi-storage-script/test_lib.ml @@ -103,12 +103,20 @@ let test_run_status = let module P = Process in let test () = let* output = P.run ~prog:"true" ~args:[] ~input:"" ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= ""; stderr= ""} in + let expected = + P.Output.{exit_status= Ok (); pid= output.pid; stdout= ""; stderr= ""} + in Alcotest.(check output_c) "Exit status is correct" expected output ; let* output = P.run ~prog:"false" ~args:[] ~input:"" ~env:[] in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr= ""} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr= "" + } in Alcotest.(check output_c) "Exit status is correct" expected output ; @@ -121,7 +129,10 @@ let test_run_output = let test () = let content = "@@@@@@" in let* output = P.run ~prog:"cat" ~args:["-"] ~input:content ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= content; stderr= ""} in + let expected = + P.Output. + {exit_status= Ok (); pid= output.pid; stdout= content; stderr= ""} + in Alcotest.(check output_c) "Stdout is correct" expected output ; let* output = P.run ~prog:"cat" ~args:[content] ~input:content ~env:[] in @@ -129,7 +140,13 @@ let test_run_output = Printf.sprintf "cat: %s: No such file or directory\n" content in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr + } in Alcotest.(check output_c) "Stderr is correct" expected output ; Lwt.return () From b93ce07bb3fe285930d6b72c35f6e38475e6af06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 21 Nov 2024 18:15:00 +0000 Subject: [PATCH 27/49] CP-52707: Improve Event.from/next API documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Describe EVENTS_LOST in Event.next. Remove EVENTS_LOST and SESSION_NOT_REGISTERED from Event.from: these are only raised by Event.next. Document the actual errors that Event.from can raise. Signed-off-by: Edwin Török --- ocaml/idl/datamodel.ml | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 83d5d1740c3..40974fbcc9d 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -8517,11 +8517,18 @@ module Event = struct ] ~doc: "Blocking call which returns a (possibly empty) batch of events. This \ - method is only recommended for legacy use. New development should use \ - event.from which supersedes this method." + method is only recommended for legacy use.It stores events in a \ + buffer of limited size, raising EVENTS_LOST if too many events got \ + generated. New development should use event.from which supersedes \ + this method." ~custom_marshaller:true ~flags:[`Session] ~result:(Set (Record _event), "A set of events") - ~errs:[Api_errors.session_not_registered; Api_errors.events_lost] + ~errs: + [ + Api_errors.session_not_registered + ; Api_errors.events_lost + ; Api_errors.event_subscription_parse_failure + ] ~allowed_roles:_R_ALL () let from = @@ -8551,7 +8558,8 @@ module Event = struct ~doc: "Blocking call which returns a new token and a (possibly empty) batch \ of events. The returned token can be used in subsequent calls to this \ - function." + function. It eliminates redundant events (e.g. same field updated \ + multiple times)." ~custom_marshaller:true ~flags:[`Session] ~result: ( Set (Record _event) @@ -8562,7 +8570,11 @@ module Event = struct (*In reality the event batch is not a set of records as stated here. Due to the difficulty of representing this in the datamodel, the doc is generated manually, so ensure the markdown_backend.ml and gen_json.ml is updated if something changes. *) - ~errs:[Api_errors.session_not_registered; Api_errors.events_lost] + ~errs: + [ + Api_errors.event_from_token_parse_failure + ; Api_errors.event_subscription_parse_failure + ] ~allowed_roles:_R_ALL () let get_current_id = From f1c3cee5a387e7a89a28244a5b0dc3208fc08053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 28/49] CA-388210: SMAPIv3 concurrency: turn on concurrent operations by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is believed that the deadlocks in SMAPIv3 were caused by VDI.copy operations that attach the same disk RO multiple times in Dom0. With the previous commits we now use a unique identifier and spawn a separate qemu-dp process in the SMAPIv3 plugins, which should prevent the deadlocks and IO errors due to lack of refcounting. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 7255845e5ea..8785c99d0a9 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1793,7 +1793,7 @@ let rec diff a b = (* default false due to bugs in SMAPIv3 plugins, once they are fixed this should be set to true *) -let concurrent = ref false +let concurrent = ref true type reload = All | Files of string list | Nothing From 3e36355c29ce7c104ef73ac283850559bf0aad36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 3 Dec 2024 11:08:06 +0000 Subject: [PATCH 29/49] CA-388210: delete comment about deadlock bug, they are fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 8785c99d0a9..cba7ec89d56 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1791,8 +1791,6 @@ let rec diff a b = | a :: aa -> if List.mem a b then diff aa b else a :: diff aa b -(* default false due to bugs in SMAPIv3 plugins, - once they are fixed this should be set to true *) let concurrent = ref true type reload = All | Files of string list | Nothing From d2804d6eabf4b4a1d778c5d7ef977e45cf44dd6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 3 Dec 2024 18:14:45 +0000 Subject: [PATCH 30/49] CA-388564: move qemu-dm to vm.slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qemu moves itself to the root cgroup, which makes it a sibling of control.slice. All toolstack processes are in control.slice, and even though control.slice gets 10x share of CPU, if we have 1000 qemu processes there is still a big imbalance (toolstack gets 10/1010 share of the CPU) Qemu-wrapper already has the ability to move the process to another cgroup, so use it. tapdisk and qemu-datapath are already in vm.slice, so move qemu-dm there too. Signed-off-by: Edwin Török --- ocaml/xenopsd/scripts/qemu-wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index 93f5c685eac..03312651075 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -47,7 +47,7 @@ xenstore = xs.xs() # - 'system.slice' means move it into the system slice, etc. # If the nominated slice does not already exist, the process will be # left in its parent's slice. -cgroup_slice = '' +cgroup_slice = 'vm.slice' CLONE_NEWNS = 0x00020000 # mount namespace CLONE_NEWNET = 0x40000000 # network namespace From 6b0247411984161807651d7d05a3992dfc5ffa34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 28 Nov 2024 09:37:50 +0000 Subject: [PATCH 31/49] CP-52821: Xapi_periodic_scheduler: introduce add_to_queue_span MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 2 +- .../lib/xapi-stdext-threads/scheduler.ml | 47 ++++++++++--------- .../lib/xapi-stdext-threads/scheduler.mli | 4 ++ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index d8036380cd7..5d61f52cfc4 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -18,7 +18,7 @@ (public_name xapi-stdext-threads.scheduler) (name xapi_stdext_threads_scheduler) (modules ipq scheduler) - (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads) + (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads clock) ) (tests diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index a544ed79bbb..8332e0897ae 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -33,31 +33,27 @@ let (queue : t Ipq.t) = Ipq.create 50 queue_default let lock = Mutex.create () -module Clock = struct - let span s = Mtime.Span.of_uint64_ns (Int64.of_float (s *. 1e9)) - - let span_to_s span = - Mtime.Span.to_uint64_ns span |> Int64.to_float |> fun ns -> ns /. 1e9 - - let add_span clock secs = - (* return mix or max available value if the add overflows *) - match Mtime.add_span clock (span secs) with - | Some t -> - t - | None when secs > 0. -> - Mtime.max_stamp - | None -> - Mtime.min_stamp -end +let add_span clock span = + (* return max value if the add overflows: spans are unsigned integers *) + match Mtime.add_span clock span with Some t -> t | None -> Mtime.max_stamp -let add_to_queue name ty start newfunc = - let ( ++ ) = Clock.add_span in +let add_to_queue_span name ty start_span newfunc = + let ( ++ ) = add_span in let item = - {Ipq.ev= {func= newfunc; ty; name}; Ipq.time= Mtime_clock.now () ++ start} + { + Ipq.ev= {func= newfunc; ty; name} + ; Ipq.time= Mtime_clock.now () ++ start_span + } in with_lock lock (fun () -> Ipq.add queue item) ; Delay.signal delay +let add_to_queue name ty start newfunc = + let start_span = + Clock.Timer.s_to_span start |> Option.value ~default:Mtime.Span.max_span + in + add_to_queue_span name ty start_span newfunc + let remove_from_queue name = with_lock lock @@ fun () -> match !pending_event with @@ -72,8 +68,11 @@ let add_periodic_pending () = with_lock lock @@ fun () -> match !pending_event with | Some ({ty= Periodic timer; _} as ev) -> - let ( ++ ) = Clock.add_span in - let item = {Ipq.ev; Ipq.time= Mtime_clock.now () ++ timer} in + let ( ++ ) = add_span in + let delta = + Clock.Timer.s_to_span timer |> Option.value ~default:Mtime.Span.max_span + in + let item = {Ipq.ev; Ipq.time= Mtime_clock.now () ++ delta} in Ipq.add queue item ; pending_event := None | Some {ty= OneShot; _} -> @@ -90,7 +89,7 @@ let loop () = with_lock lock @@ fun () -> (* empty: wait till we get something *) if Ipq.is_empty queue then - (Clock.add_span now 10.0, None) + (add_span now Mtime.Span.(10 * s), None) else let next = Ipq.maximum queue in if Mtime.is_later next.Ipq.time ~than:now then @@ -111,7 +110,9 @@ let loop () = | None -> ( (* Sleep until next event. *) let sleep = - Mtime.(span deadline now) |> Mtime.Span.(add ms) |> Clock.span_to_s + Mtime.(span deadline now) + |> Mtime.Span.(add ms) + |> Clock.Timer.span_to_s in try ignore (Delay.wait delay sleep) with e -> diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli index d4d19b1f790..53a7c764dcc 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli @@ -18,6 +18,10 @@ type func_ty = | OneShot (** Fire just once *) | Periodic of float (** Fire periodically with a given period in seconds *) +val add_to_queue_span : + string -> func_ty -> Mtime.span -> (unit -> unit) -> unit +(** Start a new timer. *) + val add_to_queue : string -> func_ty -> float -> (unit -> unit) -> unit (** Start a new timer. *) From 68cb0b84a4f0680959104062f36128d7d76d5f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 28 Nov 2024 09:55:33 +0000 Subject: [PATCH 32/49] CP-52821: Xapi_event: use Clock.Timer instead of gettimeofday MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index af2a610523c..94b7c4bd9b7 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -419,20 +419,25 @@ module From = struct let session_is_invalid call = with_lock call.m (fun () -> call.session_invalid) - let wait2 call from_id deadline = + let wait2 call from_id timer = let timeoutname = Printf.sprintf "event_from_timeout_%Ld" call.index in with_lock m (fun () -> while from_id = call.cur_id && (not (session_is_invalid call)) - && Unix.gettimeofday () < deadline + && not (Clock.Timer.has_expired timer) do - Xapi_stdext_threads_scheduler.Scheduler.add_to_queue timeoutname - Xapi_stdext_threads_scheduler.Scheduler.OneShot - (deadline -. Unix.gettimeofday () +. 0.5) - (fun () -> Condition.broadcast c) ; - Condition.wait c m ; - Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue timeoutname + match Clock.Timer.remaining timer with + | Expired _ -> + () + | Remaining delta -> + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue_span + timeoutname Xapi_stdext_threads_scheduler.Scheduler.OneShot + delta (fun () -> Condition.broadcast c + ) ; + Condition.wait c m ; + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + timeoutname done ) ; if session_is_invalid call then ( @@ -506,7 +511,7 @@ let rec next ~__context = else rpc_of_events relevant -let from_inner __context session subs from from_t deadline = +let from_inner __context session subs from from_t timer = let open Xapi_database in let open From in (* The database tables involved in our subscription *) @@ -605,14 +610,14 @@ let from_inner __context session subs from from_t deadline = && mods = [] && deletes = [] && messages = [] - && Unix.gettimeofday () < deadline + && not (Clock.Timer.has_expired timer) then ( last_generation := last ; (* Cur_id was bumped, but nothing relevent fell out of the db. Therefore the *) sub.cur_id <- last ; (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; - wait2 sub last deadline ; + wait2 sub last timer ; Thread.delay 0.05 ; grab_nonempty_range () ) else @@ -705,14 +710,19 @@ let from ~__context ~classes ~token ~timeout = ) in let subs = List.map Subscription.of_string classes in - let deadline = Unix.gettimeofday () +. timeout in + let duration = + timeout + |> Clock.Timer.s_to_span + |> Option.value ~default:Mtime.Span.(24 * hour) + in + let timer = Clock.Timer.start ~duration in (* We need to iterate because it's possible for an empty event set to be generated if we peek in-between a Modify and a Delete; we'll miss the Delete event and fail to generate the Modify because the snapshot can't be taken. *) let rec loop () = - let event_from = from_inner __context session subs from from_t deadline in - if event_from.events = [] && Unix.gettimeofday () < deadline then ( + let event_from = from_inner __context session subs from from_t timer in + if event_from.events = [] && not (Clock.Timer.has_expired timer) then ( debug "suppressing empty event.from" ; loop () ) else From 6b6c6c5fe25d11476a98e3cb307309005b8fd970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 5 Dec 2024 17:01:25 +0000 Subject: [PATCH 33/49] CP-52821: xapi_periodic_scheduler: use Mtime.span instead of Mtime.t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids dealing with overflow Signed-off-by: Edwin Török --- .../lib/xapi-stdext-threads/ipq.ml | 14 ++++++------- .../lib/xapi-stdext-threads/ipq.mli | 2 +- .../lib/xapi-stdext-threads/ipq_test.ml | 14 ++++++------- .../lib/xapi-stdext-threads/scheduler.ml | 20 ++++++++----------- 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 4cf29ed3d9b..7293ae625e1 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -13,7 +13,7 @@ *) (* Imperative priority queue *) -type 'a event = {ev: 'a; time: Mtime.t} +type 'a event = {ev: 'a; time: Mtime.span} type 'a t = {default: 'a event; mutable size: int; mutable data: 'a event array} @@ -23,7 +23,7 @@ let create n default = if n <= 0 then invalid_arg "create" else - let default = {ev= default; time= Mtime_clock.now ()} in + let default = {ev= default; time= Mtime_clock.elapsed ()} in {default; size= 0; data= Array.make n default} let is_empty h = h.size <= 0 @@ -45,7 +45,7 @@ let add h x = (* moving [x] up in the heap *) let rec moveup i = let fi = (i - 1) / 2 in - if i > 0 && Mtime.is_later d.(fi).time ~than:x.time then ( + if i > 0 && Mtime.Span.is_longer d.(fi).time ~than:x.time then ( d.(i) <- d.(fi) ; moveup fi ) else @@ -69,7 +69,7 @@ let remove h s = (* moving [x] up in the heap *) let rec moveup i = let fi = (i - 1) / 2 in - if i > 0 && Mtime.is_later d.(fi).time ~than:x.time then ( + if i > 0 && Mtime.Span.is_longer d.(fi).time ~than:x.time then ( d.(i) <- d.(fi) ; moveup fi ) else @@ -83,7 +83,7 @@ let remove h s = let j' = j + 1 in if j' < n && d.(j').time < d.(j).time then j' else j in - if Mtime.is_earlier d.(j).time ~than:x.time then ( + if Mtime.Span.is_shorter d.(j).time ~than:x.time then ( d.(i) <- d.(j) ; movedown j ) else @@ -93,7 +93,7 @@ let remove h s = in if s = n then () - else if Mtime.is_later d.(s).time ~than:x.time then + else if Mtime.Span.is_longer d.(s).time ~than:x.time then moveup s else movedown s ; @@ -129,7 +129,7 @@ let check h = let d = h.data in for i = 1 to h.size - 1 do let fi = (i - 1) / 2 in - let ordered = Mtime.is_later d.(i).time ~than:d.(fi).time in + let ordered = Mtime.Span.is_longer d.(i).time ~than:d.(fi).time in assert ordered done diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli index b7c4974e642..19f8bf1e33f 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -12,7 +12,7 @@ * GNU Lesser General Public License for more details. *) -type 'a event = {ev: 'a; time: Mtime.t} +type 'a event = {ev: 'a; time: Mtime.span} type 'a t diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index e8e64093e16..a9cc2611da8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -17,7 +17,7 @@ module Ipq = Xapi_stdext_threads_scheduler.Ipq (* test we get "out of bound" exception calling Ipq.remove *) let test_out_of_index () = let q = Ipq.create 10 0 in - Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.elapsed ()} ; let is_oob = function | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> true @@ -43,18 +43,18 @@ let test_leak () = let use_array () = array.(0) <- 'a' in let allocated = Atomic.make true in Gc.finalise (fun _ -> Atomic.set allocated false) array ; - Ipq.add q {Ipq.ev= use_array; Ipq.time= Mtime_clock.now ()} ; + Ipq.add q {Ipq.ev= use_array; Ipq.time= Mtime_clock.elapsed ()} ; Ipq.remove q 0 ; Gc.full_major () ; Gc.full_major () ; Alcotest.(check bool) "allocated" false (Atomic.get allocated) ; - Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.now ()} + Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.elapsed ()} (* test Ipq.is_empty call *) let test_empty () = let q = Ipq.create 10 0 in Alcotest.(check bool) "same value" true (Ipq.is_empty q) ; - Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.elapsed ()} ; Alcotest.(check bool) "same value" false (Ipq.is_empty q) ; Ipq.remove q 0 ; Alcotest.(check bool) "same value" true (Ipq.is_empty q) @@ -75,7 +75,7 @@ let set queue = Ipq.iter (fun d -> let t = d.time in - let t = Mtime.to_uint64_ns t in + let t = Mtime.Span.to_uint64_ns t in s := Int64Set.add t !s ) queue ; @@ -86,7 +86,7 @@ let test_old () = let s = ref Int64Set.empty in let add i = let ti = Random.int64 1000000L in - let t = Mtime.of_uint64_ns ti in + let t = Mtime.Span.of_uint64_ns ti in let e = {Ipq.time= t; Ipq.ev= i} in Ipq.add test e ; s := Int64Set.add ti !s @@ -123,7 +123,7 @@ let test_old () = let prev = ref 0L in for _ = 0 to 49 do let e = Ipq.pop_maximum test in - let t = Mtime.to_uint64_ns e.time in + let t = Mtime.Span.to_uint64_ns e.time in Alcotest.(check bool) (Printf.sprintf "%Ld bigger than %Ld" t !prev) true (t >= !prev) ; diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 8332e0897ae..27cf3069955 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -33,16 +33,12 @@ let (queue : t Ipq.t) = Ipq.create 50 queue_default let lock = Mutex.create () -let add_span clock span = - (* return max value if the add overflows: spans are unsigned integers *) - match Mtime.add_span clock span with Some t -> t | None -> Mtime.max_stamp - let add_to_queue_span name ty start_span newfunc = - let ( ++ ) = add_span in + let ( ++ ) = Mtime.Span.add in let item = { Ipq.ev= {func= newfunc; ty; name} - ; Ipq.time= Mtime_clock.now () ++ start_span + ; Ipq.time= Mtime_clock.elapsed () ++ start_span } in with_lock lock (fun () -> Ipq.add queue item) ; @@ -68,11 +64,11 @@ let add_periodic_pending () = with_lock lock @@ fun () -> match !pending_event with | Some ({ty= Periodic timer; _} as ev) -> - let ( ++ ) = add_span in + let ( ++ ) = Mtime.Span.add in let delta = Clock.Timer.s_to_span timer |> Option.value ~default:Mtime.Span.max_span in - let item = {Ipq.ev; Ipq.time= Mtime_clock.now () ++ delta} in + let item = {Ipq.ev; Ipq.time= Mtime_clock.elapsed () ++ delta} in Ipq.add queue item ; pending_event := None | Some {ty= OneShot; _} -> @@ -84,15 +80,15 @@ let loop () = debug "%s started" __MODULE__ ; try while true do - let now = Mtime_clock.now () in + let now = Mtime_clock.elapsed () in let deadline, item = with_lock lock @@ fun () -> (* empty: wait till we get something *) if Ipq.is_empty queue then - (add_span now Mtime.Span.(10 * s), None) + (Mtime.Span.add now Mtime.Span.(10 * s), None) else let next = Ipq.maximum queue in - if Mtime.is_later next.Ipq.time ~than:now then + if Mtime.Span.is_longer next.Ipq.time ~than:now then (* not expired: wait till time or interrupted *) (next.Ipq.time, None) else ( @@ -110,7 +106,7 @@ let loop () = | None -> ( (* Sleep until next event. *) let sleep = - Mtime.(span deadline now) + Mtime.(Span.abs_diff deadline now) |> Mtime.Span.(add ms) |> Clock.Timer.span_to_s in From 0e909ec335b40655a44b52454ba742b4800cdeac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Apr 2024 21:43:55 +0100 Subject: [PATCH 34/49] CP-49158: [prep] batching: add a helper for recursive, batched calls like Event.{from,next} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change Signed-off-by: Edwin Török --- ocaml/xapi-aux/dune | 1 + ocaml/xapi-aux/throttle.ml | 24 ++++++++++++++++++++++++ ocaml/xapi-aux/throttle.mli | 30 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/ocaml/xapi-aux/dune b/ocaml/xapi-aux/dune index 86fbd8647c9..d334769d655 100644 --- a/ocaml/xapi-aux/dune +++ b/ocaml/xapi-aux/dune @@ -3,6 +3,7 @@ (modes best) (libraries astring + clock cstruct forkexec ipaddr diff --git a/ocaml/xapi-aux/throttle.ml b/ocaml/xapi-aux/throttle.ml index a9dacf7f164..79761c1b762 100644 --- a/ocaml/xapi-aux/throttle.ml +++ b/ocaml/xapi-aux/throttle.ml @@ -39,3 +39,27 @@ module Make (Size : SIZE) = struct let execute f = execute (get_semaphore ()) f end + +module Batching = struct + type t = {delay_before: Mtime.span; delay_between: Mtime.span} + + let make ~delay_before ~delay_between = {delay_before; delay_between} + + (** [perform_delay delay] calls {!val:Thread.delay} when [delay] is non-zero. + + Thread.delay 0 provides no fairness guarantees, the current thread may actually be the one that gets the global lock again. + Instead {!val:Thread.yield} could be used, which does provide fairness guarantees, but it may also introduce large latencies + when there are lots of threads waiting for the OCaml runtime lock. + *) + let perform_delay delay = + if Mtime.Span.is_longer delay ~than:Mtime.Span.zero then + Thread.delay (Clock.Timer.span_to_s delay) + + let with_recursive_loop config f arg = + let rec self arg = + perform_delay config.delay_between ; + (f [@tailcall]) self arg + in + perform_delay config.delay_before ; + f self arg +end diff --git a/ocaml/xapi-aux/throttle.mli b/ocaml/xapi-aux/throttle.mli index 897fe5ed6ce..fb4212b565b 100644 --- a/ocaml/xapi-aux/throttle.mli +++ b/ocaml/xapi-aux/throttle.mli @@ -22,3 +22,33 @@ module Make (_ : SIZE) : sig val execute : (unit -> 'a) -> 'a end + +module Batching : sig + (** batching delay configuration *) + type t + + val make : delay_before:Mtime.Span.t -> delay_between:Mtime.Span.t -> t + (** [make ~delay_before ~delay_between] creates a configuration, + where we delay the API call by [delay_before] once, + and then with [delay_between] between each recursive call. + *) + + val with_recursive_loop : t -> (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + (** [with_recursive_loop config f arg] calls [f self arg], where [self] can be used + for recursive calls. + + A [delay_before] amount of seconds is inserted once, and [delay_between] is inserted between recursive calls: + {v + delay_before + f ... + (self[@tailcall]) ... + delay_between + f ... + (self[@tailcall]) ... + delay_between + f ... + v} + + The delays are determined by [config] + *) +end From efaa6064b926d35195c91eb2bd3428f40f88ca04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Apr 2024 21:47:38 +0100 Subject: [PATCH 35/49] CP-49158: [prep] Event.from: replace recursion with Batching.with_recursive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 94b7c4bd9b7..388d6130820 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -511,7 +511,7 @@ let rec next ~__context = else rpc_of_events relevant -let from_inner __context session subs from from_t timer = +let from_inner __context session subs from from_t timer batching = let open Xapi_database in let open From in (* The database tables involved in our subscription *) @@ -599,7 +599,8 @@ let from_inner __context session subs from from_t timer = (* Each event.from should have an independent subscription record *) let msg_gen, messages, tableset, (creates, mods, deletes, last) = with_call session subs (fun sub -> - let rec grab_nonempty_range () = + let grab_nonempty_range = + Throttle.Batching.with_recursive_loop batching @@ fun self () -> let ( (msg_gen, messages, _tableset, (creates, mods, deletes, last)) as result ) = @@ -618,8 +619,7 @@ let from_inner __context session subs from from_t timer = (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; wait2 sub last timer ; - Thread.delay 0.05 ; - grab_nonempty_range () + (self [@tailcall]) () ) else result in @@ -698,6 +698,10 @@ let from_inner __context session subs from from_t timer = {events; valid_ref_counts; token= Token.to_string (last, msg_gen)} let from ~__context ~classes ~token ~timeout = + let batching = + Throttle.Batching.make ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.(50 * ms) + in let session = Context.get_session_id __context in let from, from_t = try Token.of_string token @@ -721,7 +725,9 @@ let from ~__context ~classes ~token ~timeout = miss the Delete event and fail to generate the Modify because the snapshot can't be taken. *) let rec loop () = - let event_from = from_inner __context session subs from from_t timer in + let event_from = + from_inner __context session subs from from_t timer batching + in if event_from.events = [] && not (Clock.Timer.has_expired timer) then ( debug "suppressing empty event.from" ; loop () From 3e1d8a27a755597f5237dd74496cfe968da3321c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Apr 2024 21:49:09 +0100 Subject: [PATCH 36/49] CP-51692: Event.next: use same batching as Event.from MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Event.next is deprecated, but was allowed to use a lot more CPU in a tight loop than Event.from. No feature flag for this one, because Event.next is deprecated. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 388d6130820..3563d01a5f2 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -470,6 +470,10 @@ let unregister ~__context ~classes = (** Blocking call which returns the next set of events relevant to this session. *) let rec next ~__context = + let batching = + Throttle.Batching.make ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.(50 * ms) + in let session = Context.get_session_id __context in let open Next in assert_subscribed session ; @@ -489,11 +493,12 @@ let rec next ~__context = ) in (* Like grab_range () only guarantees to return a non-empty range by blocking if necessary *) - let rec grab_nonempty_range () = + let grab_nonempty_range = + Throttle.Batching.with_recursive_loop batching @@ fun self () -> let last_id, end_id = grab_range () in if last_id = end_id then let (_ : int64) = wait subscription end_id in - grab_nonempty_range () + (self [@tailcall]) () else (last_id, end_id) in From 2b4e0db649d0ccdc81196853cb2801bf0e9a13fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 00:14:11 +0100 Subject: [PATCH 37/49] CP-49158: [prep] Event.{from,next}: make delays configurable and prepare for task specific delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasks are very small, and we need to react to them more quickly, there is usually nothing to batch. Refactor code and prepare for using different delays for tasks. The delays are now configurable, but their default values are exactly the same as before. Also in the future we should probably use monotonic clocks here, but I've kep t that code unchanged for now. No functional change (except for configurability). Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 31 +++++++++++++++++------------ ocaml/xapi/xapi_globs.ml | 43 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 3563d01a5f2..39d87363df0 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -56,6 +56,12 @@ let is_lowercase str = String.for_all is_lowercase_char str module Subscription = struct type t = Class of string | Object of string * string | All + let is_task_only = function + | Class "task" | Object ("task", _) -> + true + | Class _ | Object _ | All -> + false + let of_string x = if x = "*" then All @@ -470,10 +476,7 @@ let unregister ~__context ~classes = (** Blocking call which returns the next set of events relevant to this session. *) let rec next ~__context = - let batching = - Throttle.Batching.make ~delay_before:Mtime.Span.zero - ~delay_between:Mtime.Span.(50 * ms) - in + let batching = !Xapi_globs.event_next_delay in let session = Context.get_session_id __context in let open Next in assert_subscribed session ; @@ -703,9 +706,18 @@ let from_inner __context session subs from from_t timer batching = {events; valid_ref_counts; token= Token.to_string (last, msg_gen)} let from ~__context ~classes ~token ~timeout = + let duration = + timeout + |> Clock.Timer.s_to_span + |> Option.value ~default:Mtime.Span.(24 * hour) + in + let timer = Clock.Timer.start ~duration in + let subs = List.map Subscription.of_string classes in let batching = - Throttle.Batching.make ~delay_before:Mtime.Span.zero - ~delay_between:Mtime.Span.(50 * ms) + if List.for_all Subscription.is_task_only subs then + !Xapi_globs.event_from_task_delay + else + !Xapi_globs.event_from_delay in let session = Context.get_session_id __context in let from, from_t = @@ -718,13 +730,6 @@ let from ~__context ~classes ~token ~timeout = (Api_errors.event_from_token_parse_failure, [token]) ) in - let subs = List.map Subscription.of_string classes in - let duration = - timeout - |> Clock.Timer.s_to_span - |> Option.value ~default:Mtime.Span.(24 * hour) - in - let timer = Clock.Timer.start ~duration in (* We need to iterate because it's possible for an empty event set to be generated if we peek in-between a Modify and a Delete; we'll miss the Delete event and fail to generate the Modify because the diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index ac498c60596..8f6544be663 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1068,6 +1068,46 @@ let tgroups_enabled = ref false let xapi_requests_cgroup = "/sys/fs/cgroup/cpu/control.slice/xapi.service/request" +(* Event.{from,next} batching delays *) +let make_batching name ~delay_before ~delay_between = + let name = Printf.sprintf "%s_delay" name in + let config = ref (Throttle.Batching.make ~delay_before ~delay_between) + and config_vals = ref (delay_before, delay_between) in + let set str = + Scanf.sscanf str "%f,%f" @@ fun delay_before delay_between -> + match + (Clock.Timer.s_to_span delay_before, Clock.Timer.s_to_span delay_between) + with + | Some delay_before, Some delay_between -> + config_vals := (delay_before, delay_between) ; + config := Throttle.Batching.make ~delay_before ~delay_between + | _ -> + D.warn + "Ignoring argument '%s'. (it only allows durations of less than 104 \ + days)" + str + and get () = + let d1, d2 = !config_vals in + Printf.sprintf "%f,%f" (Clock.Timer.span_to_s d1) (Clock.Timer.span_to_s d2) + and desc = + Printf.sprintf + "delays in seconds before the API call, and between internal recursive \ + calls, separated with a comma" + in + (config, (name, Arg.String set, get, desc)) + +let event_from_delay, event_from_entry = + make_batching "event_from" ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.(50 * ms) + +let event_from_task_delay, event_from_task_entry = + make_batching "event_from_task" ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.(50 * ms) + +let event_next_delay, event_next_entry = + make_batching "event_next" ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.(50 * ms) + let xapi_globs_spec = [ ( "master_connection_reset_timeout" @@ -1644,6 +1684,9 @@ let other_options = , (fun () -> string_of_bool !tgroups_enabled) , "Turn on tgroups classification" ) + ; event_from_entry + ; event_from_task_entry + ; event_next_entry ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. From 9435eea0b425b4b187b6d1f94c0877f4ccd15360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 00:33:16 +0100 Subject: [PATCH 38/49] CP-49158: Event.next is deprecated: increase delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No feature flag, because this is a deprecated API. Clients that wants the best performance should've used Event.from. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_globs.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 8f6544be663..140f8d355e7 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1105,7 +1105,8 @@ let event_from_task_delay, event_from_task_entry = ~delay_between:Mtime.Span.(50 * ms) let event_next_delay, event_next_entry = - make_batching "event_next" ~delay_before:Mtime.Span.zero + make_batching "event_next" + ~delay_before:Mtime.Span.(200 * ms) ~delay_between:Mtime.Span.(50 * ms) let xapi_globs_spec = From 0beb5c194c1a1d23a88df81c12a9c452ae6f29a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Sun, 21 Apr 2024 00:27:20 +0100 Subject: [PATCH 39/49] CP-49158: Use exponential backoff for delay between recursive calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This delay was right after we waited for a new event, delaying all event responses by 50ms (including task completions). Eliminate the first delay, so that if we find the event we're looking after the DB update, then we can return immediately. On spurious wakeups (e.g. not the event we subscribed for) the delay is still useful, so keep it for recursive calls after the first one, and exponentially increase it up to the configured maximum. No feature flag, this is a relatively small change, and we use exponential backoffs elsewhere in XAPI already. Signed-off-by: Edwin Török --- ocaml/xapi-aux/throttle.ml | 29 ++++++++++++++++++++++------- ocaml/xapi-aux/throttle.mli | 14 ++++++++++---- ocaml/xapi/xapi_event.ml | 8 ++++---- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/ocaml/xapi-aux/throttle.ml b/ocaml/xapi-aux/throttle.ml index 79761c1b762..c4606d7abaf 100644 --- a/ocaml/xapi-aux/throttle.ml +++ b/ocaml/xapi-aux/throttle.ml @@ -41,9 +41,20 @@ module Make (Size : SIZE) = struct end module Batching = struct - type t = {delay_before: Mtime.span; delay_between: Mtime.span} + type t = { + delay_initial: Mtime.span + ; delay_before: Mtime.span + ; delay_between: Mtime.span + } - let make ~delay_before ~delay_between = {delay_before; delay_between} + let make ~delay_before ~delay_between = + (* we are dividing, cannot overflow *) + let delay_initial = + Mtime.Span.to_float_ns delay_between /. 16. + |> Mtime.Span.of_float_ns + |> Option.get + in + {delay_initial; delay_before; delay_between} (** [perform_delay delay] calls {!val:Thread.delay} when [delay] is non-zero. @@ -55,11 +66,15 @@ module Batching = struct if Mtime.Span.is_longer delay ~than:Mtime.Span.zero then Thread.delay (Clock.Timer.span_to_s delay) - let with_recursive_loop config f arg = - let rec self arg = - perform_delay config.delay_between ; - (f [@tailcall]) self arg + let span_min a b = if Mtime.Span.is_shorter a ~than:b then a else b + + let with_recursive_loop config f = + let rec self arg input = + let arg = span_min config.delay_between Mtime.Span.(2 * arg) in + perform_delay arg ; + (f [@tailcall]) (self arg) input in + let self0 arg input = (f [@tailcall]) (self arg) input in perform_delay config.delay_before ; - f self arg + f (self0 config.delay_initial) end diff --git a/ocaml/xapi-aux/throttle.mli b/ocaml/xapi-aux/throttle.mli index fb4212b565b..7c5ca1e916c 100644 --- a/ocaml/xapi-aux/throttle.mli +++ b/ocaml/xapi-aux/throttle.mli @@ -37,18 +37,24 @@ module Batching : sig (** [with_recursive_loop config f arg] calls [f self arg], where [self] can be used for recursive calls. - A [delay_before] amount of seconds is inserted once, and [delay_between] is inserted between recursive calls: + [arg] is an argument that the implementation of [f] can change between recursive calls for its own purposes, + otherwise [()] can be used. + + A [delay_before] amount of seconds is inserted once, and [delay_between/8] is inserted between recursive calls, + except the first one, and delays increase exponentially until [delay_between] is reached {v delay_before f ... (self[@tailcall]) ... - delay_between f ... (self[@tailcall]) ... - delay_between + delay_between/8 f ... + (self[@tailcall]) ... + delay_between/4 + f ... v} - The delays are determined by [config] + The delays are determined by [config], and [delay_between] uses an exponential backoff, up to [config.delay_between] delay. *) end diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 39d87363df0..18195d0337e 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -497,11 +497,11 @@ let rec next ~__context = in (* Like grab_range () only guarantees to return a non-empty range by blocking if necessary *) let grab_nonempty_range = - Throttle.Batching.with_recursive_loop batching @@ fun self () -> + Throttle.Batching.with_recursive_loop batching @@ fun self arg -> let last_id, end_id = grab_range () in if last_id = end_id then let (_ : int64) = wait subscription end_id in - (self [@tailcall]) () + (self [@tailcall]) arg else (last_id, end_id) in @@ -608,7 +608,7 @@ let from_inner __context session subs from from_t timer batching = let msg_gen, messages, tableset, (creates, mods, deletes, last) = with_call session subs (fun sub -> let grab_nonempty_range = - Throttle.Batching.with_recursive_loop batching @@ fun self () -> + Throttle.Batching.with_recursive_loop batching @@ fun self arg -> let ( (msg_gen, messages, _tableset, (creates, mods, deletes, last)) as result ) = @@ -627,7 +627,7 @@ let from_inner __context session subs from from_t timer batching = (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; wait2 sub last timer ; - (self [@tailcall]) () + (self [@tailcall]) arg ) else result in From 257af948423d50721a0e749e8e47d1776d25356e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 40/49] CP-49158: Throttle: add Thread.yield MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give an opportunity for more fields to be filled, e.g. when waiting for a task to complete, give a chance for the task to actually run. No feature flag, it only changes timing. Signed-off-by: Edwin Török --- dune-project | 1 + ocaml/xapi-aux/throttle.ml | 22 ++++++++++++++-------- xapi.opam | 1 + 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/dune-project b/dune-project index e69a04e745a..651c039bc22 100644 --- a/dune-project +++ b/dune-project @@ -327,6 +327,7 @@ (synopsis "The toolstack daemon which implements the XenAPI") (description "This daemon exposes the XenAPI and is used by clients such as 'xe' and 'XenCenter' to manage clusters of Xen-enabled hosts.") (depends + (ocaml (>= 4.09)) (alcotest :with-test) angstrom astring diff --git a/ocaml/xapi-aux/throttle.ml b/ocaml/xapi-aux/throttle.ml index c4606d7abaf..26eeff877d1 100644 --- a/ocaml/xapi-aux/throttle.ml +++ b/ocaml/xapi-aux/throttle.ml @@ -56,25 +56,31 @@ module Batching = struct in {delay_initial; delay_before; delay_between} + let span_min a b = if Mtime.Span.is_shorter a ~than:b then a else b + (** [perform_delay delay] calls {!val:Thread.delay} when [delay] is non-zero. Thread.delay 0 provides no fairness guarantees, the current thread may actually be the one that gets the global lock again. Instead {!val:Thread.yield} could be used, which does provide fairness guarantees, but it may also introduce large latencies - when there are lots of threads waiting for the OCaml runtime lock. + when there are lots of threads waiting for the OCaml runtime lock. Only invoke this once, in the [delay_before] section. *) - let perform_delay delay = + let perform_delay ~yield delay = if Mtime.Span.is_longer delay ~than:Mtime.Span.zero then Thread.delay (Clock.Timer.span_to_s delay) - - let span_min a b = if Mtime.Span.is_shorter a ~than:b then a else b + else if yield then + (* this is a low-priority thread, if there are any other threads waiting, then run them now. + If there are no threads waiting then this a noop. + Requires OCaml >= 4.09 (older versions had fairness issues in Thread.yield) + *) + Thread.yield () let with_recursive_loop config f = let rec self arg input = let arg = span_min config.delay_between Mtime.Span.(2 * arg) in - perform_delay arg ; + perform_delay ~yield:false arg ; (f [@tailcall]) (self arg) input in - let self0 arg input = (f [@tailcall]) (self arg) input in - perform_delay config.delay_before ; - f (self0 config.delay_initial) + let self0 input = (f [@tailcall]) (self config.delay_initial) input in + perform_delay ~yield:true config.delay_before ; + f self0 end diff --git a/xapi.opam b/xapi.opam index e9dce9e47f5..915cc192de6 100644 --- a/xapi.opam +++ b/xapi.opam @@ -10,6 +10,7 @@ homepage: "https://xapi-project.github.io/" bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ "dune" {>= "3.15"} + "ocaml" {>= "4.09"} "alcotest" {with-test} "angstrom" "astring" From 767b3ddeca02eaf83cd77457c58c3927f7d5004d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 17:49:24 +0100 Subject: [PATCH 41/49] CP-49141: add OCaml timeslice setter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses Gc.Memprof to run a hook function periodically. This checks whether we are holding any locks, and if not and sufficient time has elapsed since the last, then we yield. POSIX timers wouldn't work here, because they deliver signals, and there are too many places in XAPI that don't handle EINTR properly. Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 5 +++ ocaml/libs/timeslice/timeslice.ml | 72 ++++++++++++++++++++++++++++++ ocaml/libs/timeslice/timeslice.mli | 44 ++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 ocaml/libs/timeslice/dune create mode 100644 ocaml/libs/timeslice/timeslice.ml create mode 100644 ocaml/libs/timeslice/timeslice.mli diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune new file mode 100644 index 00000000000..aa525bb84e2 --- /dev/null +++ b/ocaml/libs/timeslice/dune @@ -0,0 +1,5 @@ +(library + (name xapi_timeslice) + (package xapi) + (libraries threads.posix mtime mtime.clock.os) +) diff --git a/ocaml/libs/timeslice/timeslice.ml b/ocaml/libs/timeslice/timeslice.ml new file mode 100644 index 00000000000..c414b321d64 --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.ml @@ -0,0 +1,72 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(* avoid allocating an extra option every time *) +let invalid_holder = -1 + +let last_lock_holder = Atomic.make invalid_holder + +let me () = Thread.self () |> Thread.id + +let lock_acquired () = + (* these need to be very low overhead, so just keep track of the last lock holder, + i.e. track only one high-priority lock at a time + *) + Atomic.set last_lock_holder (me ()) + +let lock_released () = Atomic.set last_lock_holder invalid_holder + +let[@inline always] am_i_holding_locks () = + let last = Atomic.get last_lock_holder in + last <> invalid_holder && last = me () + +let yield_interval = Atomic.make Mtime.Span.zero + +(* TODO: use bechamel.monotonic-clock instead, which has lower overhead, + but not in the right place in xs-opam yet +*) +let last_yield = Atomic.make (Mtime_clock.counter ()) + +let failures = Atomic.make 0 + +let periodic_hook (_ : Gc.Memprof.allocation) = + let () = + try + if not (am_i_holding_locks ()) then + let elapsed = Mtime_clock.count (Atomic.get last_yield) in + if Mtime.Span.compare elapsed (Atomic.get yield_interval) > 0 then ( + let now = Mtime_clock.counter () in + Atomic.set last_yield now ; Thread.yield () + ) + with _ -> + (* It is not safe to raise exceptions here, it'd require changing all code to be safe to asynchronous interrupts/exceptions, + see https://guillaume.munch.name/software/ocaml/memprof-limits/index.html#isolation + Because this is just a performance optimization, we fall back to safe behaviour: do nothing, and just keep track that we failed + *) + Atomic.incr failures + in + None + +let periodic = + Gc.Memprof. + {null_tracker with alloc_minor= periodic_hook; alloc_major= periodic_hook} + +let set ?(sampling_rate = 1e-4) interval = + Atomic.set yield_interval + (Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ; + Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic + +let clear () = + Gc.Memprof.stop () ; + Atomic.set yield_interval Mtime.Span.zero diff --git a/ocaml/libs/timeslice/timeslice.mli b/ocaml/libs/timeslice/timeslice.mli new file mode 100644 index 00000000000..8fa54677b38 --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.mli @@ -0,0 +1,44 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val set : ?sampling_rate:float -> float -> unit +(** [set ?sampling_rate interval] calls [Thread.yield ()] at most [interval] seconds. + + The implementation of [Thread.yield] guarantees since OCaml 4.09 that we'll switch to a different OCaml thread, + if one exists that is not blocked (i.e. it doesn't rely on [sched_yield] which may run the same thread again, + but uses pthread mutexes and condition variables to ensure the current thread isn't immediately runnable). + + The setting is global for the entire process, and currently uses [Gc.Memprof] to ensure that a hook function is called periodically, + although it depends on the allocation rate of the program whether it gets called at all. + + Another alternative would be to use {!val:Unix.set_itimer}, but XAPI doesn't cope with [EINTR] in a lot of places, + and POSIX interval timers rely on signals to notify of elapsed time. + + We could also have a dedicated thread that sleeps for a certain amount of time, but if it is an OCaml thread, + we'd have no guarantees it'd get scheduled often enough (and it couldn't interrupt other threads anyway, + by the time you'd be running the handler you already gave up running something else). + + It may be desirable to avoid yielding if we are currently holding a lock, see {!val:lock_acquired}, and {!val:lock_released} + to notify this module when that happens. +*) + +val clear : unit -> unit +(** [clear ()] undoes the changes made by [set]. + This is useful for testing multiple timeslices in the same program. *) + +val lock_acquired : unit -> unit +(** [lock_acquired ()] notifies about lock acquisition. *) + +val lock_released : unit -> unit +(** [lock_acquired ()] notifies about lock release. *) From 7e42f497e2a8467e4d8ff7a8d4124bb29d165924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:05:19 +0100 Subject: [PATCH 42/49] CP-52709: add timeslice configuration to all services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And apply on startup. Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 2 +- ocaml/xapi-idl/lib/dune | 1 + ocaml/xapi-idl/lib/xcp_service.ml | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune index aa525bb84e2..5e946a66b21 100644 --- a/ocaml/libs/timeslice/dune +++ b/ocaml/libs/timeslice/dune @@ -1,5 +1,5 @@ (library (name xapi_timeslice) - (package xapi) + (package xapi-idl) (libraries threads.posix mtime mtime.clock.os) ) diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index fed65ab1257..8f0d7ca27de 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -26,6 +26,7 @@ unix uri uuidm + xapi_timeslice xapi-backtrace xapi-consts xapi-log diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 01c65bc49fb..817825c44fe 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -163,6 +163,26 @@ let setify = in loop [] +(** How long to let an OCaml thread run, before + switching to another thread. + This needs to be as small as possible to reduce latency. + + Too small values reduce performance due to context switching overheads + + 4ms = 1/HZ in Dom0 seems like a good default, + a better value will be written by a boot time service. + *) +let timeslice = ref 0.05 + +let adjust_timeslice () = + let interval = !timeslice in + D.debug "%s: Setting timeslice to %.3fs" __FUNCTION__ interval ; + if interval >= 0.05 then + D.debug "%s: Timeslice same as or larger than OCaml's default: not setting" + __FUNCTION__ + else + Xapi_timeslice.Timeslice.set interval + let common_options = [ ( "use-switch" @@ -236,6 +256,11 @@ let common_options = , (fun () -> !config_dir) , "Location of directory containing configuration file fragments" ) + ; ( "timeslice" + , Arg.Set_float timeslice + , (fun () -> Printf.sprintf "%.3f" !timeslice) + , "timeslice in seconds" + ) ] let loglevel () = !log_level @@ -454,6 +479,7 @@ let configure_common ~options ~resources arg_parse_fn = failwith (String.concat "\n" lines) ) resources ; + adjust_timeslice () ; Sys.set_signal Sys.sigpipe Sys.Signal_ignore let configure ?(argv = Sys.argv) ?(options = []) ?(resources = []) () = From 3ad905ee989f46d02c3ba59f5ff4ad30f0588c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:32:30 +0100 Subject: [PATCH 43/49] CP-52709: add simple measurement code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/libs/timeslice/simple_measure.ml | 61 +++++++++++++++++++++++++ ocaml/libs/timeslice/simple_measure.mli | 47 +++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 ocaml/libs/timeslice/simple_measure.ml create mode 100644 ocaml/libs/timeslice/simple_measure.mli diff --git a/ocaml/libs/timeslice/simple_measure.ml b/ocaml/libs/timeslice/simple_measure.ml new file mode 100644 index 00000000000..1b271643f8d --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.ml @@ -0,0 +1,61 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +let span_to_s s = Mtime.Span.to_float_ns s *. 1e-9 + +let ci95 measurements = + let n = Array.length measurements in + Array.sort Float.compare measurements ; + let median = measurements.(n / 2) in + (* "Performance Evaluation of Computer and Communication Systems", Table A. 1 *) + let n = float n in + let d = 0.98 *. sqrt n in + let lo = (n /. 2.) -. d |> Float.to_int + and hi = (n /. 2.) +. 1. +. d |> Float.ceil |> Float.to_int in + {low= measurements.(lo - 1); median; high= measurements.(hi - 1)} + +let measure ?(n = 1001) ?(inner = 10) f = + if n <= 70 then (* some of the formulas below are not valid for smaller [n] *) + invalid_arg (Printf.sprintf "n must be at least 70: %d" n) ; + (* warmup *) + Sys.opaque_identity (f ()) ; + + let measure_inner _ = + let m = Mtime_clock.counter () in + for _ = 1 to inner do + (* opaque_identity prevents the call from being optimized away *) + Sys.opaque_identity (f ()) + done ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed /. float inner + in + let measurements = Array.init n measure_inner in + ci95 measurements + +let measure_min ?(n = 1001) f arg = + (* warmup *) + Sys.opaque_identity (f arg) ; + let measure_one _ = + let m = Mtime_clock.counter () in + Sys.opaque_identity (f arg) ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed + in + Seq.ints 0 + |> Seq.take n + |> Seq.map measure_one + |> Seq.fold_left Float.min Float.max_float diff --git a/ocaml/libs/timeslice/simple_measure.mli b/ocaml/libs/timeslice/simple_measure.mli new file mode 100644 index 00000000000..76183a948e7 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.mli @@ -0,0 +1,47 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** Measure the speed of an operation in a very simple and robust way. + More detailed measurements can be dune using [Bechamel]. +*) + +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +val measure : ?n:int -> ?inner:int -> (unit -> unit) -> t +(** [measure ?n ?inner f] measures [n] times the duration of [inner] iterations of [f ()]. + + Returns the median of the inner measurements, and a 95% confidence interval. + The median is used, because it makes no assumptions about the distribution of the samples, + i.e. it doesn't require a normal (Gaussian) distribution. + + The inner measurements use a simple average, because we only know the duration of [inner] iterations, + not the duration of each individual call to [f ()]. + The purpose of the [inner] iterations is to reduce measurement overhead. + + @param n iteration count for the outer loop, must be more than [70]. + @param n iteration count for the inner loop + @param f function to measure + + @raises Invalid_argument if [n<70] + *) + +val measure_min : ?n:int -> ('a -> unit) -> 'a -> float +(** [measure_min ?n:int f arg] is the minimum amount of time that [f arg] takes. + + This should be used when we try to measure the maximum speed of some operation (e.g. cached memory accesses), + while ignoring latencies/hickups introduced by other processes on the system. + + It shouldn't be used for measuring the overhead of an operation, because the hickups may be part of that overhead. + *) From 38e1ad88acc3e78187ef3e3e98012634da8c655c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:52:12 +0100 Subject: [PATCH 44/49] CP-52709: recommended measurement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 2 +- ocaml/libs/timeslice/recommended.ml | 48 ++++++++++++++++++++++++++++ ocaml/libs/timeslice/recommended.mli | 22 +++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 ocaml/libs/timeslice/recommended.ml create mode 100644 ocaml/libs/timeslice/recommended.mli diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune index 5e946a66b21..94eff6b3a39 100644 --- a/ocaml/libs/timeslice/dune +++ b/ocaml/libs/timeslice/dune @@ -1,5 +1,5 @@ (library (name xapi_timeslice) (package xapi-idl) - (libraries threads.posix mtime mtime.clock.os) + (libraries threads.posix mtime mtime.clock.os xapi-log) ) diff --git a/ocaml/libs/timeslice/recommended.ml b/ocaml/libs/timeslice/recommended.ml new file mode 100644 index 00000000000..e57af54ed3f --- /dev/null +++ b/ocaml/libs/timeslice/recommended.ml @@ -0,0 +1,48 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = "timeslice_recommended" end) + +let yield_stop = Atomic.make false + +let yield_worker () = + while not (Atomic.get yield_stop) do + Thread.yield () + done + +let yield_overhead () = + (* Thread.yield only has an effect if another thread exists, + so create one that yields back immediately *) + D.debug "Measuring Thread.yield overhead" ; + Atomic.set yield_stop false ; + let t = Thread.create yield_worker () in + let measured = Simple_measure.measure Thread.yield in + D.debug "Thread.yield overhead: %.6fs <= %.6fs <= %.6fs" measured.low + measured.median measured.high ; + D.debug "Waiting for worker thread to stop" ; + Atomic.set yield_stop true ; + Thread.join t ; + measured.median + +let measure ?(max_overhead_percentage = 1.0) () = + let overhead = yield_overhead () in + let interval = overhead /. (max_overhead_percentage /. 100.) in + D.debug "Recommended timeslice interval = %.4fs" interval ; + (* Avoid too high or too low intervals: + do not go below 1ms (our HZ is 250, and max is 1000, the kernel would round up anyway) + do not go above 50ms (the current default in OCaml 4.14) + *) + let interval = interval |> Float.max 0.001 |> Float.min 0.050 in + D.debug "Final recommeded timeslice interval = %.4fs" interval ; + interval diff --git a/ocaml/libs/timeslice/recommended.mli b/ocaml/libs/timeslice/recommended.mli new file mode 100644 index 00000000000..6d902419345 --- /dev/null +++ b/ocaml/libs/timeslice/recommended.mli @@ -0,0 +1,22 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val measure : ?max_overhead_percentage:float -> unit -> float +(** [measure ?max_overhead_percentage ()] returns the recommended timeslice for the current system. + + The returned value should be used in a call to {!val:Timeslice.set}. + + @param max_overhead_percentage default 1% + @returns [interval] such that [overhead / interval <= max_overhead_percentage / 100] + *) From 93f85be04aabfe0fb97865c599bb5d3e79e5ab13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 22 Aug 2024 17:42:42 +0100 Subject: [PATCH 45/49] CP-52709: Enable timeslice setting during unit tests by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/tests/common/dune | 1 + ocaml/tests/common/suite_init.ml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index 29acca3d2cb..a8ab57a4a23 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -28,6 +28,7 @@ xapi-stdext-date xapi-stdext-threads.scheduler xapi-stdext-unix + xapi_timeslice ) ) diff --git a/ocaml/tests/common/suite_init.ml b/ocaml/tests/common/suite_init.ml index e63deae17b5..adb9c501e88 100644 --- a/ocaml/tests/common/suite_init.ml +++ b/ocaml/tests/common/suite_init.ml @@ -11,4 +11,6 @@ let harness_init () = Filename.concat Test_common.working_area "xapi-inventory" ; Xcp_client.use_switch := false ; Pool_role.set_pool_role_for_test () ; - Message_forwarding.register_callback_fns () + Message_forwarding.register_callback_fns () ; + (* for unit tests use a fixed value *) + Xapi_timeslice.Timeslice.set 0.004 From a4545489bde02abeef38766e73e70410fcde76ef Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 4 Dec 2024 08:16:43 +0000 Subject: [PATCH 46/49] CP-52320: Improve xapi thread classification Adds more granurality in the xapi thread classification. Now, an individual thread can be mapped to the following: - {xapi_cgroup}/internal/SM; - {xapi_cgroup}/internal/cli; - {xapi_cgroup}/external/intrapool; - {xapi_cgroup}/external/unauthenticated; - {xapi_cgroup}/external/authenticated/{identity}; where {identity} is a {auth_user_sid}/{user_agent}. Both {auth_user_sid} and {user_agent} strings are sanitized when making the identity through `Identity.make`, by allowing only alphanumenric characters and a maximum length of 16 characters each. This is only the library change. This should allow for proper thread classification in xapi. Signed-off-by: Gabriel Buica --- ocaml/libs/tgroup/dune | 2 +- ocaml/libs/tgroup/tgroup.ml | 255 +++++++++++++++++++++++++++-------- ocaml/libs/tgroup/tgroup.mli | 55 ++++++-- 3 files changed, 242 insertions(+), 70 deletions(-) diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune index 40b75ad1bbd..f845c49b2ab 100644 --- a/ocaml/libs/tgroup/dune +++ b/ocaml/libs/tgroup/dune @@ -1,4 +1,4 @@ (library (name tgroup) (public_name tgroup) - (libraries xapi-log xapi-stdext-unix)) + (libraries xapi-log xapi-stdext-unix xapi-stdext-std)) diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index a0639974670..171b78ee2b2 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -29,12 +29,24 @@ module Group = struct type t let name = "external" - end - module Host = struct - type t + module Intrapool = struct + type t + + let name = "intrapool" + end + + module Authenticated = struct + type t = string - let name = "host" + let name = "authenticated" + end + + module Unauthenticated = struct + type t + + let name = "unauthenticated" + end end module SM = struct @@ -43,73 +55,204 @@ module Group = struct let name = "SM" end + module CLI = struct + type t + + let name = "cli" + end + + module Identity = struct + type t = {user_agent: string option; subject_sid: string} + + let is_alphanum = function + | '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' -> + true + | _ -> + false + + let sanitize s = + Xapi_stdext_std.Xstringext.String.filter_chars s is_alphanum + + let make ?user_agent subject_sid = + let user_agent = + user_agent + |> Option.map sanitize + |> Option.map (fun user_agent -> + let len = Int.min (String.length user_agent) 16 in + String.sub user_agent 0 len + ) + in + + let user_agent = if user_agent = Some "" then None else user_agent in + let subject_sid = + if subject_sid = "" then "root" else sanitize subject_sid + in + {user_agent; subject_sid} + + let to_string i = + match i.user_agent with + | Some user_agent -> + i.subject_sid // user_agent + | None -> + i.subject_sid + + let root_identity = make "root" + end + type _ group = - | Internal_Host_SM : (Internal.t * Host.t * SM.t) group - | EXTERNAL : External.t group + | Internal_SM : (Internal.t * SM.t) group + | Internal_CLI : (Internal.t * CLI.t) group + | External_Intrapool : (External.t * External.Intrapool.t) group + | External_Authenticated : + Identity.t + -> (External.t * External.Authenticated.t) group + | External_Unauthenticated : (External.t * External.Unauthenticated.t) group type t = Group : 'a group -> t - let all = [Group Internal_Host_SM; Group EXTERNAL] + let all = + [ + Group Internal_SM + ; Group Internal_CLI + ; Group External_Intrapool + ; Group (External_Authenticated Identity.root_identity) + ; Group External_Unauthenticated + ] + + module Endpoint = struct type t = Internal | External end + + module Kind = struct + type t = Intrapool | Authenticated of Identity.t | Unautheticated + + let to_string = function + | Intrapool -> + External.Intrapool.name + | Authenticated identity -> + External.Authenticated.name // Identity.to_string identity + | Unautheticated -> + External.Unauthenticated.name + end module Originator = struct - type t = Internal_Host_SM | EXTERNAL + type t = Internal_SM | Internal_CLI | External let of_string = function | s when String.equal (String.lowercase_ascii SM.name) (String.lowercase_ascii s) -> - Internal_Host_SM + Internal_SM | s when String.equal - (String.lowercase_ascii External.name) + (String.lowercase_ascii CLI.name) (String.lowercase_ascii s) -> - EXTERNAL + Internal_CLI | _ -> - EXTERNAL + External let to_string = function - | Internal_Host_SM -> + | Internal_SM -> SM.name - | EXTERNAL -> + | Internal_CLI -> + CLI.name + | External -> External.name end module Creator = struct - type t = { - user: string option - ; endpoint: string option - ; originator: Originator.t - } - - let make ?user ?endpoint originator = {originator; user; endpoint} + type t = {endpoint: Endpoint.t; kind: Kind.t; originator: Originator.t} + + let make ?(intrapool = false) ?(endpoint = Endpoint.External) ?identity + ?originator () = + match (intrapool, endpoint, identity, originator) with + | true, _, _, _ -> + { + endpoint= Endpoint.External + ; kind= Kind.Intrapool + ; originator= Originator.External + } + | false, Endpoint.Internal, _, Some originator -> + { + endpoint= Endpoint.Internal + ; kind= Kind.Authenticated Identity.root_identity + ; originator + } + | false, Endpoint.Internal, _, None -> + { + endpoint= Endpoint.External + ; kind= Kind.Authenticated Identity.root_identity + ; originator= Originator.External + } + | false, Endpoint.External, Some identity, _ -> + { + endpoint= Endpoint.External + ; kind= Kind.Authenticated identity + ; originator= Originator.External + } + | false, Endpoint.External, None, _ -> + { + endpoint= Endpoint.External + ; kind= Kind.Unautheticated + ; originator= Originator.External + } + + let default_creator = + { + endpoint= Endpoint.External + ; kind= Kind.Authenticated Identity.root_identity + ; originator= Originator.External + } let to_string c = - Printf.sprintf "Creator -> user:%s endpoint:%s originator:%s" - (Option.value c.user ~default:"") - (Option.value c.endpoint ~default:"") + Printf.sprintf "Creator -> kind:%s originator:%s" (Kind.to_string c.kind) (Originator.to_string c.originator) end - let of_originator = function - | Originator.Internal_Host_SM -> - Group Internal_Host_SM - | Originator.EXTERNAL -> - Group EXTERNAL - let get_originator = function - | Group Internal_Host_SM -> - Originator.Internal_Host_SM - | Group EXTERNAL -> - Originator.EXTERNAL - - let of_creator creator = of_originator creator.Creator.originator + | Group Internal_SM -> + Originator.Internal_SM + | Group Internal_CLI -> + Originator.Internal_CLI + | _ -> + Originator.External + + let of_creator creator = + match + ( creator.Creator.endpoint + , creator.Creator.originator + , creator.Creator.kind + ) + with + | _, _, Intrapool -> + Group External_Intrapool + | Endpoint.Internal, Internal_SM, _ -> + Group Internal_SM + | Endpoint.Internal, Internal_CLI, _ -> + Group Internal_CLI + | Endpoint.External, Internal_CLI, Authenticated identity + | Endpoint.External, Internal_SM, Authenticated identity + | _, External, Authenticated identity -> + Group (External_Authenticated identity) + | Endpoint.External, Internal_CLI, Unautheticated + | Endpoint.External, Internal_SM, Unautheticated + | _, External, Unautheticated -> + Group External_Unauthenticated let to_cgroup : type a. a group -> string = function - | Internal_Host_SM -> - Internal.name // Host.name // SM.name - | EXTERNAL -> + | Internal_SM -> + Internal.name // SM.name + | Internal_CLI -> + Internal.name // CLI.name + | External_Authenticated identity -> External.name + // External.Authenticated.name + // Identity.to_string identity + | External_Intrapool -> + External.name // External.Intrapool.name + | External_Unauthenticated -> + External.name // External.Unauthenticated.name + + let to_string g = match g with Group group -> to_cgroup group end module Cgroup = struct @@ -124,6 +267,10 @@ module Cgroup = struct (fun dir -> dir // Group.to_cgroup group) (Atomic.get cgroup_dir) + let with_dir dir f arg = + Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755 ; + f arg + let write_cur_tid_to_cgroup_file filename = try let perms = 0o640 in @@ -146,39 +293,35 @@ module Cgroup = struct Option.iter (fun dir -> let tasks_file = dir // "tasks" in - write_cur_tid_to_cgroup_file tasks_file + with_dir dir write_cur_tid_to_cgroup_file tasks_file ) (dir_of group) - let set_cur_cgroup ~originator = - match originator with - | Group.Originator.Internal_Host_SM -> - attach_task (Group Internal_Host_SM) - | Group.Originator.EXTERNAL -> - attach_task (Group EXTERNAL) + let set_cur_cgroup ~creator = attach_task (Group.of_creator creator) - let set_cgroup creator = - set_cur_cgroup ~originator:creator.Group.Creator.originator + let set_cgroup creator = set_cur_cgroup ~creator let init dir = let () = Atomic.set cgroup_dir (Some dir) in Group.all |> List.filter_map dir_of - |> List.iter (fun dir -> Xapi_stdext_unix.Unixext.mkdir_rec dir 0o755) ; - set_cur_cgroup ~originator:Group.Originator.EXTERNAL + |> List.iter (fun dir -> with_dir dir debug "created cgroup for: %s" dir) ; + set_cur_cgroup ~creator:Group.Creator.default_creator end -let of_originator originator = - originator |> Group.Creator.make |> Cgroup.set_cgroup - let of_req_originator originator = Option.iter (fun _ -> try originator - |> Option.value ~default:Group.Originator.(to_string EXTERNAL) - |> Group.Originator.of_string - |> of_originator + |> Option.iter (fun originator -> + let originator = Group.Originator.of_string originator in + Group.Creator.make ~endpoint:Group.Endpoint.Internal ~originator + () + |> Cgroup.set_cgroup + ) with _ -> () ) (Atomic.get Cgroup.cgroup_dir) + +let of_creator creator = creator |> Cgroup.set_cgroup diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli index e1d5c7f0b85..b9316967ae3 100644 --- a/ocaml/libs/tgroup/tgroup.mli +++ b/ocaml/libs/tgroup/tgroup.mli @@ -16,29 +16,52 @@ threads.*) module Group : sig (** Abstract type that represents a group of execution threads in xapi. Each - group corresponds to a Creator, and has a designated level of priority.*) + group corresponds to a Creator, and has a designated level of priority.*) type t + (** Data structures that represents the identity *) + module Identity : sig + type t + + val root_identity : t + + val make : ?user_agent:string -> string -> t + + val to_string : t -> string + end + (** Generic representation of different xapi threads originators. *) module Originator : sig (** Type that represents different originators of xapi threads. *) - type t = Internal_Host_SM | EXTERNAL + type t = Internal_SM | Internal_CLI | External val of_string : string -> t (** [of_string s] creates an originator from a string [s]. - - e.g create an originator based on a http header. *) + + e.g create an originator based on a http header. *) val to_string : t -> string (** [to_string o] converts an originator [o] to its string representation.*) end + (** Generic representation of different xapi threads origin endpoints. *) + module Endpoint : sig + (** Type that represents different origin endpoints of xapi threads. *) + type t = Internal | External + end + (** Generic representation of different xapi threads creators. *) module Creator : sig (** Abstract type that represents different creators of xapi threads.*) type t - val make : ?user:string -> ?endpoint:string -> Originator.t -> t + val make : + ?intrapool:bool + -> ?endpoint:Endpoint.t + -> ?identity:Identity.t + -> ?originator:Originator.t + -> unit + -> t (** [make o] creates a creator type based on a given originator [o].*) val to_string : t -> string @@ -50,29 +73,35 @@ module Group : sig val of_creator : Creator.t -> t (** [of_creator c] returns the corresponding group based on the creator [c].*) + + val to_string : t -> string + (** [to_string g] returns the string representation of the group [g].*) end (** [Cgroup] module encapsulates different function for managing the cgroups -corresponding with [Groups].*) + corresponding with [Groups].*) module Cgroup : sig (** Represents one of the children of the cgroup directory.*) type t = string val dir_of : Group.t -> t option (** [dir_of group] returns the full path of the cgroup directory corresponding - to the group [group] as [Some dir]. - - Returns [None] if [init dir] has not been called. *) + to the group [group] as [Some dir]. + + Returns [None] if [init dir] has not been called. *) val init : string -> unit (** [init dir] initializes the hierachy of cgroups associated to all [Group.t] - types under the directory [dir].*) + types under the directory [dir].*) val set_cgroup : Group.Creator.t -> unit (** [set_cgroup c] sets the current xapi thread in a cgroup based on the - creator [c].*) + creator [c].*) end +val of_creator : Group.Creator.t -> unit +(** [of_creator g] classifies the current thread based based on the creator [c].*) + val of_req_originator : string option -> unit -(** [of_req_originator o] same as [of_originator] but it classifies based on the -http request header.*) +(** [of_req_originator o] same as [of_creator] but it classifies based on the + http request header.*) From 76c8556587e7faab29f8525988938fb91ae8522f Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 4 Dec 2024 08:36:17 +0000 Subject: [PATCH 47/49] CP-52320 & CP-52795: Add unit tests for tgroup library Adds unit test for: - the correct thread classification of `of_creator`; - sanitation of `Identity.make`. Signed-off-by: Gabriel Buica --- ocaml/libs/tgroup/dune | 7 +++ ocaml/libs/tgroup/test_tgroup.ml | 83 +++++++++++++++++++++++++++++++ ocaml/libs/tgroup/test_tgroup.mli | 0 3 files changed, 90 insertions(+) create mode 100644 ocaml/libs/tgroup/test_tgroup.ml create mode 100644 ocaml/libs/tgroup/test_tgroup.mli diff --git a/ocaml/libs/tgroup/dune b/ocaml/libs/tgroup/dune index f845c49b2ab..cff00ee1157 100644 --- a/ocaml/libs/tgroup/dune +++ b/ocaml/libs/tgroup/dune @@ -1,4 +1,11 @@ (library (name tgroup) + (modules tgroup) (public_name tgroup) (libraries xapi-log xapi-stdext-unix xapi-stdext-std)) + +(test + (name test_tgroup) + (modules test_tgroup) + (package tgroup) + (libraries tgroup alcotest xapi-log)) diff --git a/ocaml/libs/tgroup/test_tgroup.ml b/ocaml/libs/tgroup/test_tgroup.ml new file mode 100644 index 00000000000..7623a0c01ee --- /dev/null +++ b/ocaml/libs/tgroup/test_tgroup.ml @@ -0,0 +1,83 @@ +module D = Debug.Make (struct let name = __MODULE__ end) + +let test_identity () = + let specs = + [ + ((Some "XenCenter2024", "u1000"), "u1000/XenCenter2024") + ; ((None, "u1001"), "u1001") + ; ((None, "Special!@#"), "Special") + ; ((Some "With-Hyphen", "123"), "123/WithHyphen") + ; ((Some "", ""), "root") + ; ((Some " Xen Center 2024 ", ", u 1000 "), "u1000/XenCenter2024") + ; ((Some "Xen Center ,/@.~# 2024", "root"), "root/XenCenter2024") + ; ((Some "XenCenter 2024.3.18", ""), "root/XenCenter2024318") + ; ((Some "", "S-R-X-Y1-Y2-Yn-1-Yn"), "SRXY1Y2Yn1Yn") + ; ( (Some "XenCenter2024", "S-R-X-Y1-Y2-Yn-1-Yn") + , "SRXY1Y2Yn1Yn/XenCenter2024" + ) + ] + in + + let test_make ((user_agent, subject_sid), expected_identity) = + let actual_identity = + Tgroup.Group.Identity.(make ?user_agent subject_sid |> to_string) + in + Alcotest.(check string) + "Check expected identity" expected_identity actual_identity + in + List.iter test_make specs + +let test_of_creator () = + let dummy_identity = + Tgroup.Group.Identity.make ~user_agent:"XenCenter2024" "root" + in + let specs = + [ + ((None, None, None, None), "external/unauthenticated") + ; ((Some true, None, None, None), "external/intrapool") + ; ( ( Some true + , Some Tgroup.Group.Endpoint.External + , Some dummy_identity + , Some "sm" + ) + , "external/intrapool" + ) + ; ( ( Some true + , Some Tgroup.Group.Endpoint.Internal + , Some dummy_identity + , Some "sm" + ) + , "external/intrapool" + ) + ; ( ( None + , Some Tgroup.Group.Endpoint.Internal + , Some dummy_identity + , Some "cli" + ) + , "internal/cli" + ) + ; ( (None, None, Some dummy_identity, Some "sm") + , "external/authenticated/root/XenCenter2024" + ) + ] + in + let test_make ((intrapool, endpoint, identity, originator), expected_group) = + let originator = Option.map Tgroup.Group.Originator.of_string originator in + let actual_group = + Tgroup.Group.( + Creator.make ?intrapool ?endpoint ?identity ?originator () + |> of_creator + |> to_string + ) + in + Alcotest.(check string) "Check expected group" expected_group actual_group + in + List.iter test_make specs + +let tests = + [ + ("identity make", `Quick, test_identity) + ; ("group of creator", `Quick, test_of_creator) + ] + +let () = Alcotest.run "Tgroup library" [("Thread classification", tests)] diff --git a/ocaml/libs/tgroup/test_tgroup.mli b/ocaml/libs/tgroup/test_tgroup.mli new file mode 100644 index 00000000000..e69de29bb2d From 63391ba13c33491cb7950c89508d4dfef7d2bec3 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 4 Dec 2024 08:45:10 +0000 Subject: [PATCH 48/49] CP-52320 & CP-52743: Classify xapi threads. Classifies the threads at the time of session creation and inside `do_dispatch`. This ensures that new threads created by current session/request inherit the propper classification. Note: threads created by xenopsd calling back into xapi are yet to be classified. Signed-off-by: Gabriel Buica --- ocaml/xapi/dune | 1 + ocaml/xapi/server_helpers.ml | 13 +++++++++++++ ocaml/xapi/xapi_session.ml | 9 +++++++++ ocaml/xe-cli/newcli.ml | 1 + 4 files changed, 24 insertions(+) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 048bd4963f9..810fbe71e8d 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -78,6 +78,7 @@ sexplib0 sexplib sexpr + tgroup forkexec xapi-idl xapi_aux diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index e4952769c2f..65032773625 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -133,6 +133,19 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name Context.of_http_req ?session_id ~internal_async_subtask ~generate_task_for ~supports_async ~label ~http_req ~fd () in + let identity = + try + Option.map + (fun session_id -> + let subject = + Db.Session.get_auth_user_sid ~__context ~self:session_id + in + Tgroup.Group.Identity.make ?user_agent:http_req.user_agent subject + ) + session_id + with _ -> None + in + Tgroup.of_creator (Tgroup.Group.Creator.make ?identity ()) ; let sync () = let need_complete = not (Context.forwarded_task __context) in exec_with_context ~__context ~need_complete ~called_async diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index 7e77def1f43..95d310a085e 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -686,6 +686,7 @@ let consider_touching_session rpc session_id = (* Make sure the pool secret matches *) let slave_login_common ~__context ~host_str ~psecret = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> + Tgroup.of_creator (Tgroup.Group.Creator.make ~intrapool:true ()) ; if not (Helpers.PoolSecret.is_authorized psecret) then ( let msg = "Pool credentials invalid" in debug "Failed to authenticate slave %s: %s" host_str msg ; @@ -881,6 +882,8 @@ let login_with_password ~__context ~uname ~pwd ~version:_ ~originator = | Some `root -> (* in this case, the context origin of this login request is a unix socket bound locally to a filename *) (* we trust requests from local unix filename sockets, so no need to authenticate them before login *) + Tgroup.of_creator + Tgroup.Group.(Creator.make ~identity:Identity.root_identity ()) ; login_no_password_common ~__context ~uname:(Some uname) ~originator ~host:(Helpers.get_localhost ~__context) ~pool:false ~is_local_superuser:true ~subject:Ref.null ~auth_user_sid:"" @@ -929,6 +932,8 @@ let login_with_password ~__context ~uname ~pwd ~version:_ ~originator = do_local_auth uname pwd ; debug "Success: local auth, user %s from %s" uname (Context.get_origin __context) ; + Tgroup.of_creator + Tgroup.Group.(Creator.make ~identity:Identity.root_identity ()) ; login_no_password_common ~__context ~uname:(Some uname) ~originator ~host:(Helpers.get_localhost ~__context) ~pool:false ~is_local_superuser:true ~subject:Ref.null @@ -1224,6 +1229,10 @@ let login_with_password ~__context ~uname ~pwd ~version:_ ~originator = Caching.memoize ~__context uname pwd ~slow_path:query_external_auth in + Tgroup.of_creator + Tgroup.Group.( + Creator.make ~identity:(Identity.make subject_identifier) () + ) ; login_no_password_common ~__context ~uname:(Some uname) ~originator ~host:(Helpers.get_localhost ~__context) diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index 56279d6a324..c624eddec5a 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -816,6 +816,7 @@ let main () = in let args = String.concat "\n" args in Printf.fprintf oc "User-agent: xe-cli/Unix/%d.%d\r\n" major minor ; + Printf.fprintf oc "originator: cli\r\n" ; Option.iter (Printf.fprintf oc "traceparent: %s\r\n") traceparent ; Printf.fprintf oc "content-length: %d\r\n\r\n" (String.length args) ; Printf.fprintf oc "%s" args ; From 77147a3e6e7afb372b4afda11cd14d6d3808112d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 7 Jan 2025 17:14:07 +0000 Subject: [PATCH 49/49] CP-51692: Do not enable Event.next ratelimiting if Event.next is still used internally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default Event.next is still used internally, so although this API is deprecated do not yet enable the throttling by default. Fixes: 3e1d8a27a7 ("CP-51692: Event.next: use same batching as Event.from") Fixes: 2b4e0db649 ("CP-49158: [prep] Event.{from,next}: make delays configurable and prepare for task specific delays") It slows down all synchronous API calls that create tasks, like VM.start. Only enable the throttling when Event.next is not used internally (`use-event-next = false` in xapi.conf), which will eventually become the default. The code prior to the above changes used 0 delay between checking for events, so do the same here (although this lead to a lot of inefficient wakeups of all active tasks in XAPI, whenever anything changes, it matches previous behaviour) Signed-off-by: Edwin Török --- ocaml/xapi/xapi_event.ml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index e55c98cc4db..a7412790019 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -476,7 +476,13 @@ let unregister ~__context ~classes = (** Blocking call which returns the next set of events relevant to this session. *) let rec next ~__context = - let batching = !Xapi_globs.event_next_delay in + let batching = + if !Constants.use_event_next then + Throttle.Batching.make ~delay_before:Mtime.Span.zero + ~delay_between:Mtime.Span.zero + else + !Xapi_globs.event_next_delay + in let session = Context.get_session_id __context in let open Next in assert_subscribed session ;