14
14
15
15
module D = Debug. Make (struct let name = " topology" end )
16
16
17
- open D
18
-
19
17
module CPUSet = struct
20
- include Set. Make (struct
21
- type t = int
22
-
23
- let compare (x : int ) (y : int ) = compare x y
24
- end )
18
+ include Set. Make (Int )
25
19
26
20
let pp_dump = Fmt. using to_seq Fmt. (Dump. seq int )
27
21
@@ -95,27 +89,32 @@ let seq_range a b =
95
89
let rec next i () = if i = b then Seq. Nil else Seq. Cons (i, next (i + 1 )) in
96
90
next a
97
91
98
- (* * [gen_2n n] Generates all non-empty subsets of the set of [n] nodes. *)
99
- let seq_gen_2n n =
92
+ let seq_filteri p s =
93
+ let rec loop i s () =
94
+ match s () with
95
+ | Seq. Nil ->
96
+ Seq. Nil
97
+ | Cons (hd , s ) ->
98
+ if p i hd then
99
+ Cons (hd, loop (i + 1 ) s)
100
+ else
101
+ loop (i + 1 ) s ()
102
+ in
103
+ loop 0 s
104
+
105
+ (* * [seq_all_subsets n] Generates all non-empty subsets of the [nodes] set. *)
106
+ let seq_all_subsets nodes =
107
+ let n = Seq. length nodes in
100
108
(* A node can either be present in the output or not, so use a loop [1, 2^n)
101
109
and have the [i]th bit determine that. *)
102
- let of_mask i =
103
- seq_range 0 n |> Seq. filter (fun bit -> (i lsr bit) land 1 = 1 )
104
- in
110
+ let of_mask i = nodes |> seq_filteri (fun bit _ -> (i lsr bit) land 1 = 1 ) in
105
111
seq_range 1 (1 lsl n) |> Seq. map of_mask
106
112
107
113
(* * [seq_sort ~cmp s] sorts [s] in a temporary place using [cmp]. *)
108
114
let seq_sort ~cmp s =
109
115
let a = Array. of_seq s in
110
116
Array. fast_sort cmp a ; Array. to_seq a
111
117
112
- (* * [seq_append a b] is the sequence [a] followed by [b] *)
113
- let seq_append (a : 'a Seq.t ) (b : 'a Seq.t ) =
114
- let rec next v () =
115
- match v () with Seq. Nil -> b () | Seq. Cons (x , xs ) -> Seq. Cons (x, next xs)
116
- in
117
- next a
118
-
119
118
module NUMA = struct
120
119
type node = Node of int
121
120
@@ -125,31 +124,43 @@ module NUMA = struct
125
124
let compare (Node a ) (Node b ) = compare a b
126
125
end )
127
126
127
+ (* -1 in 32 bits *)
128
+ let unreachable_distance = 0xFFFFFFFF
129
+
130
+ let self_distance = 10
131
+
128
132
(* no mutation is exposed in the interface, therefore this is immutable *)
129
133
type t = {
130
134
distances : int array array
131
135
; cpu_to_node : node array
132
136
; node_cpus : CPUSet .t array
133
137
; all : CPUSet .t
134
138
; node_usage : int array
139
+ (* * Usage across nodes is meant to be balanced when choosing candidates for a VM *)
135
140
; candidates : (float * node Seq .t ) Seq .t
141
+ (* * Sequence of all subsets of nodes and the average distance within
142
+ the subset, sorted by the latter in increasing order. *)
136
143
}
137
144
138
145
let node_of_int i = Node i
139
146
140
147
let node_distances d nodes =
141
- let dists =
142
- nodes |> Seq. flat_map (fun n1 -> nodes |> Seq. map (fun n2 -> d.(n1).(n2)))
143
- in
144
- let count, max_dist, sum_dist =
145
- Seq. fold_left
146
- (fun (count , maxv , sum ) e -> (count + 1 , max maxv e, sum + e))
147
- (0 , min_int, 0 ) dists
148
- in
149
- (* We want to minimize maximum distance first, and average distance next.
150
- When running the VM we don't know which pCPU it'll end up using, and want
151
- to limit the worst case performance. *)
152
- ((max_dist, float sum_dist /. float count), nodes)
148
+ if Seq. is_empty nodes then
149
+ None
150
+ else
151
+ let dists =
152
+ nodes
153
+ |> Seq. flat_map (fun n1 -> nodes |> Seq. map (fun n2 -> d.(n1).(n2)))
154
+ in
155
+ let count, max_dist, sum_dist =
156
+ Seq. fold_left
157
+ (fun (count , maxv , sum ) e -> (count + 1 , max maxv e, sum + e))
158
+ (0 , min_int, 0 ) dists
159
+ in
160
+ (* We want to minimize maximum distance first, and average distance next.
161
+ When running the VM we don't know which pCPU it'll end up using, and want
162
+ to limit the worst case performance. *)
163
+ Some ((max_dist, float sum_dist /. float count), nodes)
153
164
154
165
let dist_cmp (a1 , _ ) (b1 , _ ) = compare a1 b1
155
166
@@ -159,60 +170,96 @@ module NUMA = struct
159
170
[n*multiply ... n*multiply + multiply-1], except we always the add the
160
171
single NUMA node combinations. *)
161
172
(* make sure that single NUMA nodes are always present in the combinations *)
162
- let single_nodes =
173
+ let distance_to_candidate d = (d, float d) in
174
+ let valid_nodes =
163
175
seq_range 0 (Array. length d)
164
- |> Seq. map (fun i -> ((10 , 10.0 ), Seq. return i))
176
+ |> Seq. filter_map (fun i ->
177
+ let self = d.(i).(i) in
178
+ if self <> unreachable_distance then
179
+ Some i
180
+ else
181
+ None
182
+ )
165
183
in
166
- let numa_nodes = Array . length d in
184
+ let numa_nodes = Seq . length valid_nodes in
167
185
let nodes =
168
- if numa_nodes > 16 then
169
- (* try just the single nodes, and give up (use all nodes otherwise) to
170
- avoid exponential running time. We could do better here, e.g. by
186
+ if numa_nodes > 16 then (
187
+ (* Avoid generating too many candidates because of the exponential
188
+ running time. We could do better here, e.g. by
171
189
reducing the matrix *)
172
- single_nodes
173
- else
174
- numa_nodes
175
- |> seq_gen_2n
176
- |> Seq. map (node_distances d)
177
- |> seq_append single_nodes
190
+ D. info
191
+ " %s: More than 16 valid NUMA nodes detected, considering only \
192
+ individual nodes."
193
+ __FUNCTION__ ;
194
+ valid_nodes
195
+ |> Seq. map (fun i ->
196
+ let self = d.(i).(i) in
197
+ (distance_to_candidate self, Seq. return i)
198
+ )
199
+ ) else
200
+ valid_nodes |> seq_all_subsets |> Seq. filter_map (node_distances d)
178
201
in
179
202
nodes
180
203
|> seq_sort ~cmp: dist_cmp
181
204
|> Seq. map (fun ((_ , avg ), nodes ) -> (avg, Seq. map (fun n -> Node n) nodes))
182
205
183
- let pp_dump_distances = Fmt. (int |> Dump. array |> Dump. array )
184
-
185
206
let make ~distances ~cpu_to_node =
186
- debug " Distances: %s" (Fmt. to_to_string pp_dump_distances distances) ;
187
- debug " CPU2Node: %s" (Fmt. to_to_string Fmt. (Dump. array int ) cpu_to_node) ;
207
+ let ( let * ) = Option. bind in
188
208
let node_cpus = Array. map (fun _ -> CPUSet. empty) distances in
209
+
210
+ (* nothing can be scheduled on unreachable nodes, remove them from the
211
+ node_cpus *)
189
212
Array. iteri
190
- (fun i node -> node_cpus.(node) < - CPUSet. add i node_cpus.(node))
191
- cpu_to_node ;
192
- Array. iteri
193
- (fun i row ->
194
- let d = distances.(i).(i) in
195
- if d <> 10 then
196
- invalid_arg
197
- (Printf. sprintf " NUMA distance from node to itself must be 10: %d" d) ;
198
- Array. iteri
199
- (fun _ d ->
200
- if d < 10 then
201
- invalid_arg (Printf. sprintf " NUMA distance must be >= 10: %d" d)
202
- )
203
- row
213
+ (fun i node ->
214
+ let self = distances.(node).(node) in
215
+ if self <> unreachable_distance then
216
+ node_cpus.(node) < - CPUSet. add i node_cpus.(node)
204
217
)
205
- distances ;
206
- let all = Array. fold_left CPUSet. union CPUSet. empty node_cpus in
207
- let candidates = gen_candidates distances in
208
- {
218
+ cpu_to_node ;
219
+
220
+ let * () =
221
+ if Array. for_all (fun cpus -> CPUSet. is_empty cpus) node_cpus then (
222
+ D. info
223
+ " Not enabling NUMA: the ACPI SLIT only contains unreachable nodes." ;
224
+ None
225
+ ) else
226
+ Some ()
227
+ in
228
+
229
+ let numa_matrix_is_reasonable =
209
230
distances
210
- ; cpu_to_node= Array. map node_of_int cpu_to_node
211
- ; node_cpus
212
- ; all
213
- ; node_usage= Array. map (fun _ -> 0 ) distances
214
- ; candidates
215
- }
231
+ |> Array. to_seqi
232
+ |> Seq. for_all (fun (i , row ) ->
233
+ let d = distances.(i).(i) in
234
+ (d = unreachable_distance || d = self_distance)
235
+ && Array. for_all
236
+ (fun d -> d > = self_distance || d = unreachable_distance)
237
+ row
238
+ )
239
+ in
240
+
241
+ let * () =
242
+ if not numa_matrix_is_reasonable then (
243
+ D. info
244
+ " Not enabling NUMA: the ACPI SLIT table contains values that are \
245
+ invalid." ;
246
+ None
247
+ ) else
248
+ Some ()
249
+ in
250
+
251
+ let candidates = gen_candidates distances in
252
+
253
+ let all = Array. fold_left CPUSet. union CPUSet. empty node_cpus in
254
+ Some
255
+ {
256
+ distances
257
+ ; cpu_to_node= Array. map node_of_int cpu_to_node
258
+ ; node_cpus
259
+ ; all
260
+ ; node_usage= Array. map (fun _ -> 0 ) distances
261
+ ; candidates
262
+ }
216
263
217
264
let distance t (Node a ) (Node b ) = t.distances.(a).(b)
218
265
0 commit comments