-
Notifications
You must be signed in to change notification settings - Fork 292
CP-53335, topology: do not raise exception when loading invalid distance matrices (NUMA) #6249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ecb099b
1a022d8
ad74029
a4982af
75e4c31
fc1d96f
3b08dfa
cba91b8
9badc14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,14 +14,8 @@ | |
|
||
module D = Debug.Make (struct let name = "topology" end) | ||
|
||
open D | ||
|
||
module CPUSet = struct | ||
include Set.Make (struct | ||
type t = int | ||
|
||
let compare (x : int) (y : int) = compare x y | ||
end) | ||
include Set.Make (Int) | ||
|
||
let pp_dump = Fmt.using to_seq Fmt.(Dump.seq int) | ||
|
||
|
@@ -95,27 +89,32 @@ let seq_range a b = | |
let rec next i () = if i = b then Seq.Nil else Seq.Cons (i, next (i + 1)) in | ||
next a | ||
|
||
(** [gen_2n n] Generates all non-empty subsets of the set of [n] nodes. *) | ||
let seq_gen_2n n = | ||
let seq_filteri p s = | ||
let rec loop i s () = | ||
match s () with | ||
| Seq.Nil -> | ||
Seq.Nil | ||
| Cons (hd, s) -> | ||
if p i hd then | ||
Cons (hd, loop (i + 1) s) | ||
else | ||
loop (i + 1) s () | ||
in | ||
loop 0 s | ||
|
||
(** [seq_all_subsets n] Generates all non-empty subsets of the [nodes] set. *) | ||
let seq_all_subsets nodes = | ||
let n = Seq.length nodes in | ||
(* A node can either be present in the output or not, so use a loop [1, 2^n) | ||
and have the [i]th bit determine that. *) | ||
let of_mask i = | ||
seq_range 0 n |> Seq.filter (fun bit -> (i lsr bit) land 1 = 1) | ||
in | ||
let of_mask i = nodes |> seq_filteri (fun bit _ -> (i lsr bit) land 1 = 1) in | ||
seq_range 1 (1 lsl n) |> Seq.map of_mask | ||
|
||
(** [seq_sort ~cmp s] sorts [s] in a temporary place using [cmp]. *) | ||
let seq_sort ~cmp s = | ||
let a = Array.of_seq s in | ||
Array.fast_sort cmp a ; Array.to_seq a | ||
|
||
(** [seq_append a b] is the sequence [a] followed by [b] *) | ||
let seq_append (a : 'a Seq.t) (b : 'a Seq.t) = | ||
let rec next v () = | ||
match v () with Seq.Nil -> b () | Seq.Cons (x, xs) -> Seq.Cons (x, next xs) | ||
in | ||
next a | ||
|
||
module NUMA = struct | ||
type node = Node of int | ||
|
||
|
@@ -125,31 +124,43 @@ module NUMA = struct | |
let compare (Node a) (Node b) = compare a b | ||
end) | ||
|
||
(* -1 in 32 bits *) | ||
let unreachable_distance = 0xFFFFFFFF | ||
|
||
let self_distance = 10 | ||
|
||
(* no mutation is exposed in the interface, therefore this is immutable *) | ||
type t = { | ||
distances: int array array | ||
; cpu_to_node: node array | ||
; node_cpus: CPUSet.t array | ||
; all: CPUSet.t | ||
; node_usage: int array | ||
(** Usage across nodes is meant to be balanced when choosing candidates for a VM *) | ||
; candidates: (float * node Seq.t) Seq.t | ||
(** Sequence of all subsets of nodes and the average distance within | ||
the subset, sorted by the latter in increasing order. *) | ||
} | ||
|
||
let node_of_int i = Node i | ||
|
||
let node_distances d nodes = | ||
let dists = | ||
nodes |> Seq.flat_map (fun n1 -> nodes |> Seq.map (fun n2 -> d.(n1).(n2))) | ||
in | ||
let count, max_dist, sum_dist = | ||
Seq.fold_left | ||
(fun (count, maxv, sum) e -> (count + 1, max maxv e, sum + e)) | ||
(0, min_int, 0) dists | ||
in | ||
(* We want to minimize maximum distance first, and average distance next. | ||
When running the VM we don't know which pCPU it'll end up using, and want | ||
to limit the worst case performance. *) | ||
((max_dist, float sum_dist /. float count), nodes) | ||
if Seq.is_empty nodes then | ||
None | ||
psafont marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else | ||
let dists = | ||
nodes | ||
|> Seq.flat_map (fun n1 -> nodes |> Seq.map (fun n2 -> d.(n1).(n2))) | ||
in | ||
let count, max_dist, sum_dist = | ||
Seq.fold_left | ||
(fun (count, maxv, sum) e -> (count + 1, max maxv e, sum + e)) | ||
(0, min_int, 0) dists | ||
in | ||
(* We want to minimize maximum distance first, and average distance next. | ||
When running the VM we don't know which pCPU it'll end up using, and want | ||
to limit the worst case performance. *) | ||
Some ((max_dist, float sum_dist /. float count), nodes) | ||
|
||
let dist_cmp (a1, _) (b1, _) = compare a1 b1 | ||
|
||
|
@@ -159,60 +170,96 @@ module NUMA = struct | |
[n*multiply ... n*multiply + multiply-1], except we always the add the | ||
single NUMA node combinations. *) | ||
(* make sure that single NUMA nodes are always present in the combinations *) | ||
let single_nodes = | ||
let distance_to_candidate d = (d, float d) in | ||
let valid_nodes = | ||
seq_range 0 (Array.length d) | ||
|> Seq.map (fun i -> ((10, 10.0), Seq.return i)) | ||
|> Seq.filter_map (fun i -> | ||
let self = d.(i).(i) in | ||
if self <> unreachable_distance then | ||
Some i | ||
else | ||
None | ||
) | ||
in | ||
let numa_nodes = Array.length d in | ||
let numa_nodes = Seq.length valid_nodes in | ||
let nodes = | ||
if numa_nodes > 16 then | ||
(* try just the single nodes, and give up (use all nodes otherwise) to | ||
avoid exponential running time. We could do better here, e.g. by | ||
if numa_nodes > 16 then ( | ||
(* Avoid generating too many candidates because of the exponential | ||
running time. We could do better here, e.g. by | ||
reducing the matrix *) | ||
single_nodes | ||
else | ||
numa_nodes | ||
|> seq_gen_2n | ||
|> Seq.map (node_distances d) | ||
|> seq_append single_nodes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the single nodes were always appended just in case something goes wrong with the filtering algorithm (which was meant to be somewhat smarter than brute force, or it may evolve in the future to be somewhat smarter). As the algorithm currently looks like I agree that we don't need to append the single nodes here. Perhaps this would be a good condition to test for in the testcases, that single (reachable) nodes are always present with the expected value (unless such a test already exists). |
||
D.info | ||
"%s: More than 16 valid NUMA nodes detected, considering only \ | ||
individual nodes." | ||
__FUNCTION__ ; | ||
valid_nodes | ||
|> Seq.map (fun i -> | ||
let self = d.(i).(i) in | ||
(distance_to_candidate self, Seq.return i) | ||
) | ||
) else | ||
valid_nodes |> seq_all_subsets |> Seq.filter_map (node_distances d) | ||
in | ||
nodes | ||
|> seq_sort ~cmp:dist_cmp | ||
|> Seq.map (fun ((_, avg), nodes) -> (avg, Seq.map (fun n -> Node n) nodes)) | ||
|
||
let pp_dump_distances = Fmt.(int |> Dump.array |> Dump.array) | ||
|
||
let make ~distances ~cpu_to_node = | ||
debug "Distances: %s" (Fmt.to_to_string pp_dump_distances distances) ; | ||
debug "CPU2Node: %s" (Fmt.to_to_string Fmt.(Dump.array int) cpu_to_node) ; | ||
let ( let* ) = Option.bind in | ||
let node_cpus = Array.map (fun _ -> CPUSet.empty) distances in | ||
|
||
(* nothing can be scheduled on unreachable nodes, remove them from the | ||
node_cpus *) | ||
Array.iteri | ||
(fun i node -> node_cpus.(node) <- CPUSet.add i node_cpus.(node)) | ||
cpu_to_node ; | ||
Array.iteri | ||
(fun i row -> | ||
let d = distances.(i).(i) in | ||
if d <> 10 then | ||
invalid_arg | ||
(Printf.sprintf "NUMA distance from node to itself must be 10: %d" d) ; | ||
Array.iteri | ||
(fun _ d -> | ||
if d < 10 then | ||
invalid_arg (Printf.sprintf "NUMA distance must be >= 10: %d" d) | ||
) | ||
row | ||
(fun i node -> | ||
let self = distances.(node).(node) in | ||
if self <> unreachable_distance then | ||
node_cpus.(node) <- CPUSet.add i node_cpus.(node) | ||
) | ||
distances ; | ||
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in | ||
let candidates = gen_candidates distances in | ||
{ | ||
cpu_to_node ; | ||
|
||
let* () = | ||
if Array.for_all (fun cpus -> CPUSet.is_empty cpus) node_cpus then ( | ||
D.info | ||
"Not enabling NUMA: the ACPI SLIT only contains unreachable nodes." ; | ||
None | ||
) else | ||
Some () | ||
in | ||
|
||
let numa_matrix_is_reasonable = | ||
distances | ||
; cpu_to_node= Array.map node_of_int cpu_to_node | ||
; node_cpus | ||
; all | ||
; node_usage= Array.map (fun _ -> 0) distances | ||
; candidates | ||
} | ||
|> Array.to_seqi | ||
|> Seq.for_all (fun (i, row) -> | ||
let d = distances.(i).(i) in | ||
(d = unreachable_distance || d = self_distance) | ||
&& Array.for_all | ||
(fun d -> d >= self_distance || d = unreachable_distance) | ||
row | ||
) | ||
in | ||
|
||
let* () = | ||
if not numa_matrix_is_reasonable then ( | ||
D.info | ||
"Not enabling NUMA: the ACPI SLIT table contains values that are \ | ||
invalid." ; | ||
None | ||
) else | ||
Some () | ||
in | ||
|
||
let candidates = gen_candidates distances in | ||
|
||
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in | ||
Some | ||
{ | ||
distances | ||
; cpu_to_node= Array.map node_of_int cpu_to_node | ||
; node_cpus | ||
; all | ||
; node_usage= Array.map (fun _ -> 0) distances | ||
; candidates | ||
} | ||
|
||
let distance t (Node a) (Node b) = t.distances.(a).(b) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.