From e8f9090630d3f9ac670980d635b92c52b28caccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 29 Apr 2024 14:39:16 +0100 Subject: [PATCH 1/6] tracing: add missing locks on read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not safe to access a global hashtable from multiple threads, even if the operations are read-only (it may be concurrently changed by another thread, which then may result in errors in the racing thread). This means we must always take the mutex, and because OCaml doesn't have a reader-writer mutex, we need to take the exclusive mutex. Eventually we should use a better datastructure here (immutable maps, or lock-free datastructures), but for now fix the datastructure that we currently use to be thread-safe. Signed-off-by: Edwin Török --- ocaml/libs/tracing/tracing.ml | 62 ++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 6e1ed32810a..7d8218d393b 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -296,7 +296,10 @@ module Spans = struct let spans = Hashtbl.create 100 - let span_count () = Hashtbl.length spans + let span_count () = + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + Hashtbl.length spans + ) let max_spans = ref 1000 @@ -308,9 +311,15 @@ module Spans = struct let finished_spans = Hashtbl.create 100 - let span_hashtbl_is_empty () = Hashtbl.length spans = 0 + let span_hashtbl_is_empty () = + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + Hashtbl.length spans = 0 + ) - let finished_span_hashtbl_is_empty () = Hashtbl.length finished_spans = 0 + let finished_span_hashtbl_is_empty () = + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + Hashtbl.length finished_spans = 0 + ) let add_to_spans ~(span : Span.t) = let key = span.context.trace_id in @@ -373,13 +382,14 @@ module Spans = struct match x with | None -> false - | Some (span : Span.t) -> ( - match Hashtbl.find_opt finished_spans span.context.trace_id with - | None -> - false - | Some span_list -> - List.exists (fun x -> x = span) span_list - ) + | Some (span : Span.t) -> + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + match Hashtbl.find_opt finished_spans span.context.trace_id with + | None -> + false + | Some span_list -> + List.exists (fun x -> x = span) span_list + ) (** since copies the existing finished spans and then clears the existing spans as to only export them once *) let since () = @@ -389,7 +399,10 @@ module Spans = struct copy ) - let dump () = Hashtbl.(copy spans, Hashtbl.copy finished_spans) + let dump () = + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + Hashtbl.(copy spans, Hashtbl.copy finished_spans) + ) module GC = struct let lock = Mutex.create () @@ -538,9 +551,12 @@ let lock = Mutex.create () let tracer_providers = Hashtbl.create 100 -let get_tracer_providers () = +let get_tracer_providers_unlocked () = Hashtbl.fold (fun _ provider acc -> provider :: acc) tracer_providers [] +let get_tracer_providers () = + Xapi_stdext_threads.Threadext.Mutex.execute lock get_tracer_providers_unlocked + let set ?enabled ?attributes ?endpoints ~uuid () = Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> let provider = @@ -561,17 +577,17 @@ let set ?enabled ?attributes ?endpoints ~uuid () = failwith (Printf.sprintf "The TracerProvider : %s does not exist" uuid) in - Hashtbl.replace tracer_providers uuid provider - ) ; - if - List.for_all - (fun provider -> not provider.TracerProvider.enabled) - (get_tracer_providers ()) - then - Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> - Hashtbl.clear Spans.spans ; - Hashtbl.clear Spans.finished_spans - ) + Hashtbl.replace tracer_providers uuid provider ; + if + List.for_all + (fun provider -> not provider.TracerProvider.enabled) + (get_tracer_providers_unlocked ()) + then + Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> + Hashtbl.clear Spans.spans ; + Hashtbl.clear Spans.finished_spans + ) + ) let create ~enabled ~attributes ~endpoints ~name_label ~uuid = let endpoints = List.map endpoint_of_string endpoints in From 75131fb2639fcc315da5f83d648ceedf5bbc47bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 29 Apr 2024 14:41:22 +0100 Subject: [PATCH 2/6] tracing: replace global ref with Atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for OCaml 5, on OCaml 4 they'd be equivalent. Note that adding Atomic doesn't make operations on these values always atomic: that is the responsibility of surrounding code. E.g. Atomic.get + Atomic.set is not atomic, because another domain might've raced and changed the value inbetween (so in that case Atomic.compare_and_set should be used). However for global flags that are read multiple times, but set from a central place this isn't a problem. Signed-off-by: Edwin Török --- ocaml/libs/tracing/tracing.ml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 7d8218d393b..759fd7a2068 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -84,9 +84,9 @@ let validate_attribute (key, value) = && Re.execp attribute_key_regex key && W3CBaggage.Key.is_valid_key key -let observe = ref true +let observe = Atomic.make true -let set_observe mode = observe := mode +let set_observe mode = Atomic.set observe mode module SpanKind = struct type t = Server | Consumer | Client | Producer | Internal [@@deriving rpcty] @@ -301,13 +301,13 @@ module Spans = struct Hashtbl.length spans ) - let max_spans = ref 1000 + let max_spans = Atomic.make 1000 - let set_max_spans x = max_spans := x + let set_max_spans x = Atomic.set max_spans x - let max_traces = ref 1000 + let max_traces = Atomic.make 1000 - let set_max_traces x = max_traces := x + let set_max_traces x = Atomic.set max_traces x let finished_spans = Hashtbl.create 100 @@ -326,13 +326,13 @@ module Spans = struct Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> match Hashtbl.find_opt spans key with | None -> - if Hashtbl.length spans < !max_traces then + if Hashtbl.length spans < Atomic.get max_traces then Hashtbl.add spans key [span] else debug "%s exceeded max traces when adding to span table" __FUNCTION__ | Some span_list -> - if List.length span_list < !max_spans then + if List.length span_list < Atomic.get max_spans then Hashtbl.replace spans key (span :: span_list) else debug "%s exceeded max traces when adding to span table" @@ -363,13 +363,13 @@ module Spans = struct Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> match Hashtbl.find_opt finished_spans key with | None -> - if Hashtbl.length finished_spans < !max_traces then + if Hashtbl.length finished_spans < Atomic.get max_traces then Hashtbl.add finished_spans key [span] else debug "%s exceeded max traces when adding to finished span table" __FUNCTION__ | Some span_list -> - if List.length span_list < !max_spans then + if List.length span_list < Atomic.get max_spans then Hashtbl.replace finished_spans key (span :: span_list) else debug "%s exceeded max traces when adding to finished span table" @@ -407,7 +407,7 @@ module Spans = struct module GC = struct let lock = Mutex.create () - let span_timeout = ref 86400. + let span_timeout = Atomic.make 86400. let span_timeout_thread = ref None @@ -421,7 +421,7 @@ module Spans = struct let elapsed = Unix.gettimeofday () -. span.Span.begin_time in - if elapsed > !span_timeout *. 1000000. then ( + if elapsed > Atomic.get span_timeout *. 1000000. then ( debug "Tracing: Span %s timed out, forcibly finishing now" span.Span.context.span_id ; let span = @@ -444,14 +444,14 @@ module Spans = struct ) let initialise_thread ~timeout = - span_timeout := timeout ; + Atomic.set span_timeout timeout ; span_timeout_thread := Some (Thread.create (fun () -> while true do debug "Tracing: Span garbage collector" ; - Thread.delay !span_timeout ; + Thread.delay (Atomic.get span_timeout) ; gc_inactive_spans () done ) @@ -631,7 +631,7 @@ let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout let with_tracing ?(attributes = []) ?(parent = None) ~name f = - if not !observe then + if not (Atomic.get observe) then f None else let tracer = get_tracer ~name in From 7a350e6bee64d274297b607d0d60def16deb2a68 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 23 Apr 2024 15:51:08 +0100 Subject: [PATCH 3/6] CP-48195: Set `Tracing.observe` default to `false` Sets `observe` to `false` until at least one tracer provider is enabled. If all tracer providers are disabled we set it back to `false`. Prior to this `observe` seemed to be always `true` (apart from unit tests). This would cause `with_tracing` to spam the logs with warnings `No provider found...` until at least one tracer provider is enabled. By setting `observe` to `false` as default and updating it depending on the state of the tracer providers, `with_tracing` should now execute no extra operations. Therefore, we avoid spamming the logs unnecessarily. Signed-off-by: Gabriel Buica --- ocaml/libs/tracing/tracing.ml | 86 ++++++++++++++++++++-------------- ocaml/libs/tracing/tracing.mli | 2 +- ocaml/tests/test_observer.ml | 31 +++++++++--- 3 files changed, 76 insertions(+), 43 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 759fd7a2068..03270ec9cc0 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -16,6 +16,8 @@ module D = Debug.Make (struct let name = "tracing" end) module Delay = Xapi_stdext_threads.Threadext.Delay open D +let fail fmt = Printf.ksprintf failwith fmt + module W3CBaggage = struct module Key = struct let is_valid_key str = @@ -84,10 +86,12 @@ let validate_attribute (key, value) = && Re.execp attribute_key_regex key && W3CBaggage.Key.is_valid_key key -let observe = Atomic.make true +let observe = Atomic.make false let set_observe mode = Atomic.set observe mode +let get_observe () = Atomic.get observe + module SpanKind = struct type t = Server | Consumer | Client | Producer | Internal [@@deriving rpcty] @@ -388,7 +392,7 @@ module Spans = struct | None -> false | Some span_list -> - List.exists (fun x -> x = span) span_list + List.mem span span_list ) (** since copies the existing finished spans and then clears the existing spans as to only export them once *) @@ -558,45 +562,51 @@ let get_tracer_providers () = Xapi_stdext_threads.Threadext.Mutex.execute lock get_tracer_providers_unlocked let set ?enabled ?attributes ?endpoints ~uuid () = + let update_provider (provider : TracerProvider.t) enabled attributes endpoints + = + let enabled = Option.value ~default:provider.enabled enabled in + let attributes : string Attributes.t = + Option.fold ~none:provider.attributes ~some:Attributes.of_list attributes + in + let endpoints = + Option.fold ~none:provider.endpoints + ~some:(List.map endpoint_of_string) + endpoints + in + {provider with enabled; attributes; endpoints} + in + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> let provider = match Hashtbl.find_opt tracer_providers uuid with | Some (provider : TracerProvider.t) -> - let enabled = Option.value ~default:provider.enabled enabled in - let attributes : string Attributes.t = - Option.fold ~none:provider.attributes ~some:Attributes.of_list - attributes - in - let endpoints = - Option.fold ~none:provider.endpoints - ~some:(List.map endpoint_of_string) - endpoints - in - {provider with enabled; attributes; endpoints} + update_provider provider enabled attributes endpoints | None -> - failwith - (Printf.sprintf "The TracerProvider : %s does not exist" uuid) + fail "The TracerProvider : %s does not exist" uuid in Hashtbl.replace tracer_providers uuid provider ; if List.for_all (fun provider -> not provider.TracerProvider.enabled) (get_tracer_providers_unlocked ()) - then + then ( + set_observe false ; Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> Hashtbl.clear Spans.spans ; Hashtbl.clear Spans.finished_spans ) + ) else + set_observe true ) let create ~enabled ~attributes ~endpoints ~name_label ~uuid = - let endpoints = List.map endpoint_of_string endpoints in - let attributes = Attributes.of_list attributes in let provider : TracerProvider.t = + let endpoints = List.map endpoint_of_string endpoints in + let attributes = Attributes.of_list attributes in {name_label; attributes; endpoints; enabled} in Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - match Hashtbl.find_opt tracer_providers uuid with + ( match Hashtbl.find_opt tracer_providers uuid with | None -> Hashtbl.add tracer_providers uuid provider | Some _ -> @@ -605,35 +615,37 @@ let create ~enabled ~attributes ~endpoints ~name_label ~uuid = handy to not change the control flow since calls like cluster_pool_resync might not be aware that a TracerProvider has already been created.*) error "Tracing : TracerProvider %s already exists" name_label + ) ; + if enabled then set_observe true ) let destroy ~uuid = Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.remove tracer_providers uuid + let _ = Hashtbl.remove tracer_providers uuid in + if Hashtbl.length tracer_providers = 0 then set_observe false else () ) let get_tracer ~name = - let providers = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.fold (fun _k v acc -> v :: acc) tracer_providers [] - ) - in - match - List.find_opt (fun provider -> provider.TracerProvider.enabled) providers - with - | Some provider -> - Tracer.create ~name ~provider - | None -> - (* warn "No provider found for tracing %s" name ; *) - Tracer.no_op + if Atomic.get observe then ( + let providers = + Xapi_stdext_threads.Threadext.Mutex.execute lock + get_tracer_providers_unlocked + in + + match List.find_opt TracerProvider.get_enabled providers with + | Some provider -> + Tracer.create ~name ~provider + | None -> + warn "No provider found for tracing %s" name ; + Tracer.no_op + ) else + Tracer.no_op let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout let with_tracing ?(attributes = []) ?(parent = None) ~name f = - if not (Atomic.get observe) then - f None - else + if Atomic.get observe then ( let tracer = get_tracer ~name in match Tracer.start ~tracer ~attributes ~name ~parent () with | Ok span -> ( @@ -650,6 +662,8 @@ let with_tracing ?(attributes = []) ?(parent = None) ~name f = | Error e -> warn "Failed to start tracing: %s" (Printexc.to_string e) ; f None + ) else + f None module EnvHelpers = struct let traceparent_key = "TRACEPARENT" diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index b7f33b6d051..c67c5b2823b 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -174,7 +174,7 @@ val with_tracing : -> (Span.t option -> 'a) -> 'a -val set_observe : bool -> unit +val get_observe : unit -> bool val validate_attribute : string * string -> bool diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index a6b943741b1..3e8d88417be 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -40,8 +40,7 @@ let trace_log_dir ?(test_name = "") () = let () = Destination.File.set_trace_log_dir (trace_log_dir ()) ; - set_service_name "unit_tests" ; - set_observe false + set_service_name "unit_tests" module Xapi_DB = struct let assert_num_observers ~__context x = @@ -405,6 +404,13 @@ let test_hashtbl_leaks () = let test_trace_log_dir = trace_log_dir ~test_name:"test_hashtbl_leaks" () in let __context = Test_common.make_test_database () in let self = test_create ~__context ~enabled:true () in + let filter_export_spans span = + match Span.get_name span with + | "Tracing.flush_spans" | "Tracing.File.export" | "Tracing.Http.export" -> + false + | _ -> + true + in let span = start_test_span () in ( match span with | Ok x -> @@ -424,10 +430,23 @@ let test_hashtbl_leaks () = false ; Destination.flush_spans () ; - Alcotest.(check bool) - "Span export clears finished_spans hashtable" - (Tracer.finished_span_hashtbl_is_empty ()) - true + + (* Flushing the spans always creates two spans if there are tracer providers enabled. + - Tracing.flush_spans; + - Tracing.File.export/Tracing.Http.export. + + Therefore, the finished spans table is not always empty after flushing. + *) + let _, finished_spans = Spans.dump () in + let filtered_spans_count = + finished_spans + |> Hashtbl.to_seq_values + |> Seq.concat_map List.to_seq + |> Seq.filter filter_export_spans + |> Seq.length + in + Alcotest.(check int) + "Span export clears finished_spans hash table" filtered_spans_count 0 | Error e -> Alcotest.failf "Span start failed with %s" (Printexc.to_string e) ) ; From 0a3b936e5d1b2ad2861c6d20ad76a15eca2d6024 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 2 May 2024 11:01:23 +0100 Subject: [PATCH 4/6] CP-48195: Add unit tests for `tracing` library. Create a new unit test file `test_tracing.ml` for the `tracing` library. Add tests for `create`/`set`/`destroy` tracer providers. Now that we change the library to have `observe` modes based on whether or not we have `tracer providers` enabled, we want to make sure that functions on applied on tracer providers set the correct mode for the library. Signed-off-by: Gabriel Buica --- ocaml/libs/tracing/dune | 5 + ocaml/libs/tracing/test_tracing.ml | 213 ++++++++++++++++++++++++++++ ocaml/libs/tracing/test_tracing.mli | 0 ocaml/tests/test_observer.ml | 4 +- 4 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 ocaml/libs/tracing/test_tracing.ml create mode 100644 ocaml/libs/tracing/test_tracing.mli diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 0e1160818c2..743b5418f69 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -24,3 +24,8 @@ zstd) (preprocess (pps ppx_deriving_rpc))) + +(test + (name test_tracing) + (modules test_tracing) + (libraries tracing alcotest uuid)) diff --git a/ocaml/libs/tracing/test_tracing.ml b/ocaml/libs/tracing/test_tracing.ml new file mode 100644 index 00000000000..8c10b011374 --- /dev/null +++ b/ocaml/libs/tracing/test_tracing.ml @@ -0,0 +1,213 @@ +module D = Debug.Make (struct let name = "test_tracing" end) + +let attr_to_string (key, value) = Printf.sprintf "%s:%s" key value + +let attr_compare attr1 attr2 = + String.compare (attr_to_string attr1) (attr_to_string attr2) + +let attr_testable = + let pp = Fmt.of_to_string attr_to_string in + + Alcotest.testable pp (fun (k1, v1) (k2, v2) -> k1 = k2 && v1 = v2) + +let assert_provider_enabled provider flag = + Alcotest.(check bool) + "Check state of trace provider" flag + (Tracing.TracerProvider.get_enabled provider) + +let assert_provider_attrs provider attributes = + Alcotest.(check (slist attr_testable attr_compare)) + "Check attributes of trace provider" attributes + (Tracing.TracerProvider.get_attributes provider) + +let assert_provider_name_label provider label = + Alcotest.(check string) + "Check label of trace provider" label + (Tracing.TracerProvider.get_name_label provider) + +let assert_provider_endpoints provider endpoints = + Alcotest.(check (list string)) + "Check endpoints of trace provider" endpoints + (provider + |> Tracing.TracerProvider.get_endpoints + |> List.map Tracing.endpoint_to_string + ) + +let assert_observe_mode flag = + Alcotest.(check bool) + "Check observe mode of library" flag (Tracing.get_observe ()) + +let uuid1, uuid2, uuid3 = + Uuidx.(to_string (make ()), to_string (make ()), to_string (make ())) + +let http_endpoint = "http://example.com:9411/api/v2/spans" + +let with_observe_mode_check flag f = f () ; assert_observe_mode flag + +let get_provider name_label = + let providers = + Tracing.get_tracer_providers () + |> List.filter (fun provider -> + String.equal + (Tracing.TracerProvider.get_name_label provider) + name_label + ) + in + match providers with + | [provider] -> + provider + | _ -> + Alcotest.failf "expected only one provider" + +let create_with (enabled, attributes, endpoints, name_label, uuid) = + let () = Tracing.create ~enabled ~attributes ~endpoints ~name_label ~uuid in + get_provider name_label + +let test_destroy_all_providers uuids = + let () = List.iter (fun uuid -> Tracing.destroy ~uuid) uuids in + assert_observe_mode false + +let test_create_and_destroy () = + let test_create_with (enabled, attributes, endpoints, name_label, uuid) = + let provider = + create_with (enabled, attributes, endpoints, name_label, uuid) + in + assert_provider_enabled provider enabled ; + assert_provider_attrs provider attributes ; + assert_provider_endpoints provider endpoints ; + assert_provider_name_label provider name_label + in + + let uuids = [uuid1; uuid2; uuid3] in + + let provider_confs_enable_observe = + [ + ( true + , [("enabled", "true")] + , [Tracing.bugtool_name] + , "dummy_test_provider_1" + , uuid1 + ) + ; ( false + , [] + , [Tracing.bugtool_name; http_endpoint] + , "dummy_test_provider_2" + , uuid2 + ) + ; ( false + , [("enabled", "false"); ("is_test", "true")] + , [http_endpoint] + , "dummy_test_provider_3" + , uuid3 + ) + ] + in + let provider_confs_disable_observe = + [ + ( false + , [("enabled", "false")] + , [Tracing.bugtool_name] + , "dummy_test_provider_1" + , uuid1 + ) + ; ( false + , [] + , [Tracing.bugtool_name; http_endpoint] + , "dummy_test_provider_2" + , uuid2 + ) + ; ( false + , [("enabled", "false"); ("is_test", "true")] + , [http_endpoint] + , "dummy_test_provider_3" + , uuid3 + ) + ] + in + + (* We start with no tracer providers, therefore, we expect the observe mode to + be disbled. *) + assert_observe_mode false ; + + let test_provider_conf conf expected_mode_state = + with_observe_mode_check expected_mode_state (fun () -> + List.iter test_create_with conf + ) ; + test_destroy_all_providers uuids ; + with_observe_mode_check expected_mode_state (fun () -> + List.iter test_create_with (List.rev conf) + ) ; + test_destroy_all_providers uuids + in + + test_provider_conf provider_confs_enable_observe true ; + test_provider_conf provider_confs_disable_observe false + +let test_set_tracer_provider () = + let test_set_with provider (enabled, attributes, endpoints, uuid) = + Tracing.set ~enabled ~attributes ~endpoints ~uuid () ; + let updated_provider = + provider |> Tracing.TracerProvider.get_name_label |> get_provider + in + assert_provider_enabled updated_provider enabled ; + assert_provider_attrs updated_provider attributes ; + assert_provider_endpoints updated_provider endpoints + in + + let provider1 = + create_with + (false, [], [Tracing.bugtool_name], "dummy_test_provider_1", uuid1) + in + + let provider2 = + create_with + (false, [], [Tracing.bugtool_name], "dummy_test_provider_2", uuid2) + in + + let new_provider1_confs = + ( ( true + , [("test_set", "true")] + , [Tracing.bugtool_name; http_endpoint] + , uuid1 + ) + , (false, [], [Tracing.bugtool_name; http_endpoint], uuid1) + ) + in + + let new_provider2_confs = + ( ( true + , [("test_set", "true"); ("dummy_key", "dummy_value")] + , [Tracing.bugtool_name; http_endpoint] + , uuid2 + ) + , (false, [], [Tracing.bugtool_name; http_endpoint], uuid2) + ) + in + + assert_observe_mode false ; + + with_observe_mode_check true (fun () -> + test_set_with provider1 (fst new_provider1_confs) + ) ; + + with_observe_mode_check true (fun () -> + test_set_with provider2 (fst new_provider2_confs) + ) ; + with_observe_mode_check true (fun () -> + test_set_with provider1 (snd new_provider1_confs) + ) ; + + with_observe_mode_check false (fun () -> + test_set_with provider2 (snd new_provider2_confs) + ) ; + + test_destroy_all_providers [uuid1; uuid2] ; + assert_observe_mode false + +let test = + [ + ("Create and destroy tracer providers", `Quick, test_create_and_destroy) + ; ("Set tracer provider", `Quick, test_set_tracer_provider) + ] + +let () = Alcotest.run "Tracing library" [("trace providers", test)] diff --git a/ocaml/libs/tracing/test_tracing.mli b/ocaml/libs/tracing/test_tracing.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 3e8d88417be..05112f0bb80 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -405,8 +405,8 @@ let test_hashtbl_leaks () = let __context = Test_common.make_test_database () in let self = test_create ~__context ~enabled:true () in let filter_export_spans span = - match Span.get_name span with - | "Tracing.flush_spans" | "Tracing.File.export" | "Tracing.Http.export" -> + match String.lowercase_ascii (Span.get_name span) with + | "tracing.flush_spans" | "tracing.file.export" | "tracing.http.export" -> false | _ -> true From a0c84e5261b5dc116177bb5277606666b1330259 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Tue, 30 Apr 2024 15:23:28 +0100 Subject: [PATCH 5/6] CP-48195: Remove code duplication. Removes code duplication in `storage_mux.ml` by using the already existing `with_dbg` implementation from `debuginfo` module. This should lower the chances of unintentionally introducing bugs by having to maintain two version of the same functions. e.g. Not using the no op when tracing is disabled and generating unwanted warning messages. Signed-off-by: Gabriel Buica --- ocaml/xapi/storage_mux.ml | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 0931b4b0903..0dcef1d201f 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -18,26 +18,8 @@ module D = Debug.Make (struct let name = "mux" end) open D -(* Sets the logging context based on `dbg`. - Also adds a new tracing span, linked to the parent span from `dbg`, if available. *) let with_dbg ~name ~dbg f = - let open Debug_info in - let di = of_string dbg in - Debug.with_thread_associated di.log - (fun () -> - let name = "SMAPIv2." ^ name in - let tracer = Tracing.get_tracer ~name in - let span = Tracing.Tracer.start ~tracer ~name ~parent:di.tracing () in - match span with - | Ok span_context -> - let result = f {di with tracing= span_context} in - let _ = Tracing.Tracer.finish span_context in - result - | Error e -> - D.warn "Failed to start tracing: %s" (Printexc.to_string e) ; - f di - ) - () + Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f type processor = Rpc.call -> Rpc.response From 43e26f3454f1ffdaab349fa244dbc1210a214afa Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 2 May 2024 14:06:51 +0100 Subject: [PATCH 6/6] CP-48195: Tracing -- Move `create`\`set`\`destroy`\... Moves the following: - `create` under `TracerProvider`; - `set` under `TracerProvider`; - `destroy` under `TracerProvider; - `get_tracer_providers` unde `TracerProvider`; - `get_tracer` under `Tracer`. Adds documentation for `TracerProvider` module. It kept being confusing of what `Tracing.set` does unless I was going through the implementation again and again. Therefore, I moved some of the functions so that their functionality becomes (hopefully) more intuitive. Signed-off-by: Gabriel Buica --- ocaml/libs/tracing/test_tracing.ml | 11 +- ocaml/libs/tracing/tracing.ml | 184 ++++++++++++++------------- ocaml/libs/tracing/tracing.mli | 67 ++++++---- ocaml/libs/tracing/tracing_export.ml | 2 +- ocaml/tests/test_observer.ml | 8 +- ocaml/xapi/context.ml | 2 +- ocaml/xapi/xapi_observer.ml | 11 +- ocaml/xenopsd/lib/xenops_server.ml | 15 ++- 8 files changed, 166 insertions(+), 134 deletions(-) diff --git a/ocaml/libs/tracing/test_tracing.ml b/ocaml/libs/tracing/test_tracing.ml index 8c10b011374..3d8935638e5 100644 --- a/ocaml/libs/tracing/test_tracing.ml +++ b/ocaml/libs/tracing/test_tracing.ml @@ -46,7 +46,7 @@ let with_observe_mode_check flag f = f () ; assert_observe_mode flag let get_provider name_label = let providers = - Tracing.get_tracer_providers () + Tracing.TracerProvider.get_tracer_providers () |> List.filter (fun provider -> String.equal (Tracing.TracerProvider.get_name_label provider) @@ -60,11 +60,14 @@ let get_provider name_label = Alcotest.failf "expected only one provider" let create_with (enabled, attributes, endpoints, name_label, uuid) = - let () = Tracing.create ~enabled ~attributes ~endpoints ~name_label ~uuid in + let () = + Tracing.TracerProvider.create ~enabled ~attributes ~endpoints ~name_label + ~uuid + in get_provider name_label let test_destroy_all_providers uuids = - let () = List.iter (fun uuid -> Tracing.destroy ~uuid) uuids in + let () = List.iter (fun uuid -> Tracing.TracerProvider.destroy ~uuid) uuids in assert_observe_mode false let test_create_and_destroy () = @@ -145,7 +148,7 @@ let test_create_and_destroy () = let test_set_tracer_provider () = let test_set_with provider (enabled, attributes, endpoints, uuid) = - Tracing.set ~enabled ~attributes ~endpoints ~uuid () ; + Tracing.TracerProvider.set ~enabled ~attributes ~endpoints ~uuid () ; let updated_provider = provider |> Tracing.TracerProvider.get_name_label |> get_provider in diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 03270ec9cc0..b0b20c49672 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -412,6 +412,7 @@ module Spans = struct let lock = Mutex.create () let span_timeout = Atomic.make 86400. + (* one day in seconds *) let span_timeout_thread = ref None @@ -479,6 +480,81 @@ module TracerProvider = struct let get_endpoints t = t.endpoints let get_enabled t = t.enabled + + let lock = Mutex.create () + + let tracer_providers = Hashtbl.create 100 + + let create ~enabled ~attributes ~endpoints ~name_label ~uuid = + let provider : t = + let endpoints = List.map endpoint_of_string endpoints in + let attributes = Attributes.of_list attributes in + {name_label; attributes; endpoints; enabled} + in + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + ( match Hashtbl.find_opt tracer_providers uuid with + | None -> + Hashtbl.add tracer_providers uuid provider + | Some _ -> + (* CP-45469: It is ok not to have an exception here since it is unlikely that the + user has caused the issue, so no need to propagate back. It is also + handy to not change the control flow since calls like cluster_pool_resync + might not be aware that a TracerProvider has already been created.*) + error "Tracing : TracerProvider %s already exists" name_label + ) ; + if enabled then set_observe true + ) + + let get_tracer_providers_unlocked () = + Hashtbl.fold (fun _ provider acc -> provider :: acc) tracer_providers [] + + let get_tracer_providers () = + Xapi_stdext_threads.Threadext.Mutex.execute lock + get_tracer_providers_unlocked + + let set ?enabled ?attributes ?endpoints ~uuid () = + let update_provider (provider : t) enabled attributes endpoints = + let enabled = Option.value ~default:provider.enabled enabled in + let attributes : string Attributes.t = + Option.fold ~none:provider.attributes ~some:Attributes.of_list + attributes + in + let endpoints = + Option.fold ~none:provider.endpoints + ~some:(List.map endpoint_of_string) + endpoints + in + {provider with enabled; attributes; endpoints} + in + + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + let provider = + match Hashtbl.find_opt tracer_providers uuid with + | Some (provider : t) -> + update_provider provider enabled attributes endpoints + | None -> + fail "The TracerProvider : %s does not exist" uuid + in + Hashtbl.replace tracer_providers uuid provider ; + if + List.for_all + (fun provider -> not provider.enabled) + (get_tracer_providers_unlocked ()) + then ( + set_observe false ; + Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> + Hashtbl.clear Spans.spans ; + Hashtbl.clear Spans.finished_spans + ) + ) else + set_observe true + ) + + let destroy ~uuid = + Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> + let _ = Hashtbl.remove tracer_providers uuid in + if Hashtbl.length tracer_providers = 0 then set_observe false else () + ) end module Tracer = struct @@ -497,6 +573,22 @@ module Tracer = struct in {name= ""; provider} + let get_tracer ~name = + if Atomic.get observe then ( + let providers = + Xapi_stdext_threads.Threadext.Mutex.execute TracerProvider.lock + TracerProvider.get_tracer_providers_unlocked + in + + match List.find_opt TracerProvider.get_enabled providers with + | Some provider -> + create ~name ~provider + | None -> + warn "No provider found for tracing %s" name ; + no_op + ) else + no_op + let span_of_span_context context name : Span.t = { context @@ -551,102 +643,12 @@ module Tracer = struct Spans.finished_span_hashtbl_is_empty () end -let lock = Mutex.create () - -let tracer_providers = Hashtbl.create 100 - -let get_tracer_providers_unlocked () = - Hashtbl.fold (fun _ provider acc -> provider :: acc) tracer_providers [] - -let get_tracer_providers () = - Xapi_stdext_threads.Threadext.Mutex.execute lock get_tracer_providers_unlocked - -let set ?enabled ?attributes ?endpoints ~uuid () = - let update_provider (provider : TracerProvider.t) enabled attributes endpoints - = - let enabled = Option.value ~default:provider.enabled enabled in - let attributes : string Attributes.t = - Option.fold ~none:provider.attributes ~some:Attributes.of_list attributes - in - let endpoints = - Option.fold ~none:provider.endpoints - ~some:(List.map endpoint_of_string) - endpoints - in - {provider with enabled; attributes; endpoints} - in - - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - let provider = - match Hashtbl.find_opt tracer_providers uuid with - | Some (provider : TracerProvider.t) -> - update_provider provider enabled attributes endpoints - | None -> - fail "The TracerProvider : %s does not exist" uuid - in - Hashtbl.replace tracer_providers uuid provider ; - if - List.for_all - (fun provider -> not provider.TracerProvider.enabled) - (get_tracer_providers_unlocked ()) - then ( - set_observe false ; - Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> - Hashtbl.clear Spans.spans ; - Hashtbl.clear Spans.finished_spans - ) - ) else - set_observe true - ) - -let create ~enabled ~attributes ~endpoints ~name_label ~uuid = - let provider : TracerProvider.t = - let endpoints = List.map endpoint_of_string endpoints in - let attributes = Attributes.of_list attributes in - {name_label; attributes; endpoints; enabled} - in - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - ( match Hashtbl.find_opt tracer_providers uuid with - | None -> - Hashtbl.add tracer_providers uuid provider - | Some _ -> - (* CP-45469: It is ok not to have an exception here since it is unlikely that the - user has caused the issue, so no need to propagate back. It is also - handy to not change the control flow since calls like cluster_pool_resync - might not be aware that a TracerProvider has already been created.*) - error "Tracing : TracerProvider %s already exists" name_label - ) ; - if enabled then set_observe true - ) - -let destroy ~uuid = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - let _ = Hashtbl.remove tracer_providers uuid in - if Hashtbl.length tracer_providers = 0 then set_observe false else () - ) - -let get_tracer ~name = - if Atomic.get observe then ( - let providers = - Xapi_stdext_threads.Threadext.Mutex.execute lock - get_tracer_providers_unlocked - in - - match List.find_opt TracerProvider.get_enabled providers with - | Some provider -> - Tracer.create ~name ~provider - | None -> - warn "No provider found for tracing %s" name ; - Tracer.no_op - ) else - Tracer.no_op - let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout let with_tracing ?(attributes = []) ?(parent = None) ~name f = if Atomic.get observe then ( - let tracer = get_tracer ~name in + let tracer = Tracer.get_tracer ~name in match Tracer.start ~tracer ~attributes ~name ~parent () with | Ok span -> ( try diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index c67c5b2823b..b0c47b3062b 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -110,6 +110,8 @@ end module Tracer : sig type t + val get_tracer : name:string -> t + val span_of_span_context : SpanContext.t -> string -> Span.t val start : @@ -131,40 +133,61 @@ module Tracer : sig val finished_span_hashtbl_is_empty : unit -> bool end +(** [TracerProvider] module provides ways to intereact with the tracer providers. + *) module TracerProvider : sig + (** Type that represents a tracer provider.*) type t + val create : + enabled:bool + -> attributes:(string * string) list + -> endpoints:string list + -> name_label:string + -> uuid:string + -> unit + (** [create ~enabled ~attributes ~endpoints ~name_label ~uuid] initializes a + tracer provider based on the following parameters: [enabled], [attributes], + [endpoints], [name_label], and [uuid]. *) + + val set : + ?enabled:bool + -> ?attributes:(string * string) list + -> ?endpoints:string list + -> uuid:string + -> unit + -> unit + (** [set ?enabled ?attributes ?endpoints ~uuid ()] updates the tracer provider + identified by the given [uuid] with the new configuration paremeters: + [enabled], [attributes], and [endpoints]. + + If any of the configuration parameters are + missing, the old ones are kept. + + Raises [Failure] if there are no tracer provider with the given [uuid]. + *) + + val destroy : uuid:string -> unit + (** [destroy ~uuid] destroys the tracer provider with the given [uuid]. + If there are no tracer provider with the given [uuid], it does nothing. + *) + + val get_tracer_providers : unit -> t list + (** [get_tracer_providers] returns a list of all existing tracer providers. *) + val get_name_label : t -> string + (** [get_name_label provider] returns the name label of the [provider]. *) val get_attributes : t -> (string * string) list + (** [get_attributes provider] returns the list of attributes of the [provider]. *) val get_endpoints : t -> endpoint list + (** [get_endpoints provider] returns list of endpoints of the [provider]. *) val get_enabled : t -> bool + (** [get_name_label provider] returns whether or not the [provider] is enabled. *) end -val set : - ?enabled:bool - -> ?attributes:(string * string) list - -> ?endpoints:string list - -> uuid:string - -> unit - -> unit - -val create : - enabled:bool - -> attributes:(string * string) list - -> endpoints:string list - -> name_label:string - -> uuid:string - -> unit - -val destroy : uuid:string -> unit - -val get_tracer_providers : unit -> TracerProvider.t list - -val get_tracer : name:string -> Tracer.t - val enable_span_garbage_collector : ?timeout:float -> unit -> unit val with_tracing : diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index a769b2403bc..5bb154d20c2 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -280,7 +280,7 @@ module Destination = struct let@ parent = with_tracing ~parent:None ~attributes ~name:"Tracing.flush_spans" in - get_tracer_providers () + TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled |> List.concat_map TracerProvider.get_endpoints |> List.iter (export_to_endpoint parent span_list) diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 05112f0bb80..322c586cb20 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -62,13 +62,13 @@ end module TracerProvider = struct let assert_num_observers ~__context x = - let providers = get_tracer_providers () in + let providers = TracerProvider.get_tracer_providers () in Alcotest.(check int) (Printf.sprintf "%d provider(s) exists in lib " x) x (List.length providers) let find_provider_exn ~name = - let providers = get_tracer_providers () in + let providers = TracerProvider.get_tracer_providers () in match List.find_opt (fun x -> TracerProvider.get_name_label x = name) providers with @@ -135,12 +135,12 @@ let test_create ~__context ?(name_label = "test-observer") ?(enabled = false) () self let start_test_span () = - let tracer = get_tracer ~name:"test-observer" in + let tracer = Tracer.get_tracer ~name:"test-observer" in let span = Tracer.start ~tracer ~name:"test_task" ~parent:None () in span let start_test_trace () = - let tracer = get_tracer ~name:"test-observer" in + let tracer = Tracer.get_tracer ~name:"test-observer" in let root = Tracer.start ~tracer ~name:"test_task" ~parent:None () |> Result.value ~default:None diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 080bab8fcad..089f04b8216 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -331,7 +331,7 @@ let start_tracing_helper ?(span_attributes = []) parent_fn task_name = let span_name, span_attributes = span_details_from_task_name task_name in let parent = parent_fn span_name in let span_kind = span_kind_of_parent parent in - let tracer = get_tracer ~name:span_name in + let tracer = Tracer.get_tracer ~name:span_name in match Tracer.start ~span_kind ~tracer ~attributes:span_attributes ~name:span_name ~parent () diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index b282f76bfe0..2c1fcd81312 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -62,23 +62,24 @@ end module Observer : ObserverInterface = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = debug "Observer.create %s" uuid ; - Tracing.create ~uuid ~name_label ~attributes ~endpoints ~enabled + Tracing.TracerProvider.create ~uuid ~name_label ~attributes ~endpoints + ~enabled let destroy ~__context ~uuid = debug "Observer.destroy %s" uuid ; - Tracing.destroy ~uuid + Tracing.TracerProvider.destroy ~uuid let set_enabled ~__context ~uuid ~enabled = debug "Observer.set_enabled %s" uuid ; - Tracing.set ~uuid ~enabled () + Tracing.TracerProvider.set ~uuid ~enabled () let set_attributes ~__context ~uuid ~attributes = debug "Observer.set_attributes %s" uuid ; - Tracing.set ~uuid ~attributes () + Tracing.TracerProvider.set ~uuid ~attributes () let set_endpoints ~__context ~uuid ~endpoints = debug "Observer.set_endpoints %s" uuid ; - Tracing.set ~uuid ~endpoints () + Tracing.TracerProvider.set ~uuid ~endpoints () let init ~__context = debug "Observer.init" ; diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 6177c0f4b64..30fc7ea16ac 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1822,7 +1822,7 @@ let rec atomics_of_operation = function let with_tracing ~name ~task f = let open Tracing in let parent = Xenops_task.tracing task in - let tracer = get_tracer ~name in + let tracer = Tracer.get_tracer ~name in match Tracer.start ~tracer ~name ~parent () with | Ok span -> ( Xenops_task.set_tracing task span ; @@ -4021,30 +4021,33 @@ module Observer = struct debug "Observer.create : dbg=%s" dbg ; Debug.with_thread_associated dbg (fun () -> - Tracing.create ~uuid ~name_label ~attributes ~endpoints ~enabled + Tracing.TracerProvider.create ~uuid ~name_label ~attributes ~endpoints + ~enabled ) () let destroy _ dbg uuid = debug "Observer.destroy : dbg=%s" dbg ; - Debug.with_thread_associated dbg (fun () -> Tracing.destroy ~uuid) () + Debug.with_thread_associated dbg + (fun () -> Tracing.TracerProvider.destroy ~uuid) + () let set_enabled _ dbg uuid enabled = debug "Observer.set_enabled : dbg=%s" dbg ; Debug.with_thread_associated dbg - (fun () -> Tracing.set ~uuid ~enabled ()) + (fun () -> Tracing.TracerProvider.set ~uuid ~enabled ()) () let set_attributes _ dbg uuid attributes = debug "Observer.set_attributes : dbg=%s" dbg ; Debug.with_thread_associated dbg - (fun () -> Tracing.set ~uuid ~attributes ()) + (fun () -> Tracing.TracerProvider.set ~uuid ~attributes ()) () let set_endpoints _ dbg uuid endpoints = debug "Observer.set_endpoint : dbg=%s" dbg ; Debug.with_thread_associated dbg - (fun () -> Tracing.set ~uuid ~endpoints ()) + (fun () -> Tracing.TracerProvider.set ~uuid ~endpoints ()) () let init _ dbg =