Skip to content

CP-53658: Claim memory on a single NUMA node #6369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 66 additions & 30 deletions ocaml/xapi-client/event_helper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,96 +43,132 @@ type event_record =
| VMPP of [`VMPP] Ref.t * API.vMPP_t option
| VMSS of [`VMSS] Ref.t * API.vMSS_t option

let maybe f x = match x with Some x -> Some (f x) | None -> None

let record_of_event ev =
let rpc = ev.Event_types.snapshot in
match ev.Event_types.ty with
| "session" ->
Session
( Ref.of_secret_string ev.Event_types.reference
, maybe API.session_t_of_rpc rpc
, Option.map API.session_t_of_rpc rpc
)
| "task" ->
Task (Ref.of_string ev.Event_types.reference, maybe API.task_t_of_rpc rpc)
Task
( Ref.of_string ev.Event_types.reference
, Option.map API.task_t_of_rpc rpc
)
| "event" ->
Event
(Ref.of_string ev.Event_types.reference, maybe API.event_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.event_t_of_rpc rpc
)
| "vm" ->
VM (Ref.of_string ev.Event_types.reference, maybe API.vM_t_of_rpc rpc)
VM (Ref.of_string ev.Event_types.reference, Option.map API.vM_t_of_rpc rpc)
| "vm_metrics" ->
VM_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.vM_metrics_t_of_rpc rpc
, Option.map API.vM_metrics_t_of_rpc rpc
)
| "vm_guest_metrics" ->
VM_guest_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.vM_guest_metrics_t_of_rpc rpc
, Option.map API.vM_guest_metrics_t_of_rpc rpc
)
| "host" ->
Host (Ref.of_string ev.Event_types.reference, maybe API.host_t_of_rpc rpc)
Host
( Ref.of_string ev.Event_types.reference
, Option.map API.host_t_of_rpc rpc
)
| "host_metrics" ->
Host_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.host_metrics_t_of_rpc rpc
, Option.map API.host_metrics_t_of_rpc rpc
)
| "host_cpu" ->
Host_cpu
(Ref.of_string ev.Event_types.reference, maybe API.host_cpu_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.host_cpu_t_of_rpc rpc
)
| "network" ->
Network
(Ref.of_string ev.Event_types.reference, maybe API.network_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.network_t_of_rpc rpc
)
| "vif" ->
VIF (Ref.of_string ev.Event_types.reference, maybe API.vIF_t_of_rpc rpc)
VIF
(Ref.of_string ev.Event_types.reference, Option.map API.vIF_t_of_rpc rpc)
| "vif_metrics" ->
VIF_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.vIF_metrics_t_of_rpc rpc
, Option.map API.vIF_metrics_t_of_rpc rpc
)
| "pif" ->
PIF (Ref.of_string ev.Event_types.reference, maybe API.pIF_t_of_rpc rpc)
PIF
(Ref.of_string ev.Event_types.reference, Option.map API.pIF_t_of_rpc rpc)
| "pif_metrics" ->
PIF_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.pIF_metrics_t_of_rpc rpc
, Option.map API.pIF_metrics_t_of_rpc rpc
)
| "sr" ->
SR (Ref.of_string ev.Event_types.reference, maybe API.sR_t_of_rpc rpc)
SR (Ref.of_string ev.Event_types.reference, Option.map API.sR_t_of_rpc rpc)
| "vdi" ->
VDI (Ref.of_string ev.Event_types.reference, maybe API.vDI_t_of_rpc rpc)
VDI
(Ref.of_string ev.Event_types.reference, Option.map API.vDI_t_of_rpc rpc)
| "vbd" ->
VBD (Ref.of_string ev.Event_types.reference, maybe API.vBD_t_of_rpc rpc)
VBD
(Ref.of_string ev.Event_types.reference, Option.map API.vBD_t_of_rpc rpc)
| "vbd_metrics" ->
VBD_metrics
( Ref.of_string ev.Event_types.reference
, maybe API.vBD_metrics_t_of_rpc rpc
, Option.map API.vBD_metrics_t_of_rpc rpc
)
| "pbd" ->
PBD (Ref.of_string ev.Event_types.reference, maybe API.pBD_t_of_rpc rpc)
PBD
(Ref.of_string ev.Event_types.reference, Option.map API.pBD_t_of_rpc rpc)
| "crashdump" ->
Crashdump
( Ref.of_string ev.Event_types.reference
, maybe API.crashdump_t_of_rpc rpc
, Option.map API.crashdump_t_of_rpc rpc
)
| "vtpm" ->
VTPM (Ref.of_string ev.Event_types.reference, maybe API.vTPM_t_of_rpc rpc)
VTPM
( Ref.of_string ev.Event_types.reference
, Option.map API.vTPM_t_of_rpc rpc
)
| "console" ->
Console
(Ref.of_string ev.Event_types.reference, maybe API.console_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.console_t_of_rpc rpc
)
| "user" ->
User (Ref.of_string ev.Event_types.reference, maybe API.user_t_of_rpc rpc)
User
( Ref.of_string ev.Event_types.reference
, Option.map API.user_t_of_rpc rpc
)
| "pool" ->
Pool (Ref.of_string ev.Event_types.reference, maybe API.pool_t_of_rpc rpc)
Pool
( Ref.of_string ev.Event_types.reference
, Option.map API.pool_t_of_rpc rpc
)
| "message" ->
Message
(Ref.of_string ev.Event_types.reference, maybe API.message_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.message_t_of_rpc rpc
)
| "secret" ->
Secret
(Ref.of_string ev.Event_types.reference, maybe API.secret_t_of_rpc rpc)
( Ref.of_string ev.Event_types.reference
, Option.map API.secret_t_of_rpc rpc
)
| "vmpp" ->
VMPP (Ref.of_string ev.Event_types.reference, maybe API.vMPP_t_of_rpc rpc)
VMPP
( Ref.of_string ev.Event_types.reference
, Option.map API.vMPP_t_of_rpc rpc
)
| "vmss" ->
VMSS (Ref.of_string ev.Event_types.reference, maybe API.vMSS_t_of_rpc rpc)
VMSS
( Ref.of_string ev.Event_types.reference
, Option.map API.vMSS_t_of_rpc rpc
)
| _ ->
failwith "unknown event type"
7 changes: 4 additions & 3 deletions ocaml/xenopsd/c_stubs/xenctrlext_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,16 +672,17 @@ CAMLprim value stub_xenforeignmemory_unmap(value fmem, value mapping)
}

CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, value domid_val,
value nr_pages_val)
value numa_node_val, value nr_pages_val)
{
CAMLparam3(xch_val, domid_val, nr_pages_val);
CAMLparam4(xch_val, domid_val, numa_node_val, nr_pages_val);
int retval, the_errno;
xc_interface* xch = xch_of_val(xch_val);
uint32_t domid = Int_val(domid_val);
// unsigned int numa_node = Int_val(numa_node_val);
unsigned long nr_pages = Long_val(nr_pages_val);

caml_release_runtime_system();
retval = xc_domain_claim_pages(xch, domid, nr_pages);
retval = xc_domain_claim_pages(xch, domid, /*numa_node,*/ nr_pages);
the_errno = errno;
caml_acquire_runtime_system();

Expand Down
11 changes: 5 additions & 6 deletions ocaml/xenopsd/lib/softaffinity.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ let plan host nodes ~vm =
(Fmt.to_to_string NUMARequest.pp_dump requested)
(Fmt.to_to_string NUMAResource.pp_dump allocated) ;
let candidate = nodes.(nodeidx) in
( NUMAResource.union allocated candidate
, node :: picked
, NUMARequest.shrink requested candidate
)
(* This is where the memory allocated to the node can be calculated *)
let remaining_request = NUMARequest.shrink requested candidate in
(NUMAResource.union allocated candidate, node :: picked, remaining_request)
in
let plan_valid (avg, nodes) =
let allocated, picked, remaining =
Expand Down Expand Up @@ -72,8 +71,8 @@ let plan host nodes ~vm =
| None ->
debug "No allocations possible" ;
None
| Some allocated ->
| Some (allocated, nodes) ->
debug "Allocated resources: %s"
(Fmt.to_to_string NUMAResource.pp_dump allocated) ;
assert (NUMARequest.fits vm allocated) ;
Some allocated.NUMAResource.affinity
Some (allocated.NUMAResource.affinity, nodes)
6 changes: 5 additions & 1 deletion ocaml/xenopsd/lib/softaffinity.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@

open Topology

val plan : NUMA.t -> NUMAResource.t array -> vm:NUMARequest.t -> CPUSet.t option
val plan :
NUMA.t
-> NUMAResource.t array
-> vm:NUMARequest.t
-> (Topology.CPUSet.t * Topology.NUMA.node list) option
(** [plan host nodes ~vm] returns the CPU soft affinity recommended for [vm],
Such that the memory latency between the NUMA nodes of the vCPUs is small,
and usage of NUMA nodes is balanced.
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xenopsd/lib/topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ module NUMA = struct
None
else (
List.iter (fun (Node n) -> t.node_usage.(n) <- t.node_usage.(n) + 1) nodes ;
Some result
Some (result, nodes)
)

let pp_dump_node = Fmt.(using (fun (Node x) -> x) int)
Expand Down
5 changes: 4 additions & 1 deletion ocaml/xenopsd/lib/topology.mli
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ module NUMA : sig
NUMA nodes > 16 it limits the length of the sequence to [n+65520], to
avoid exponential blowup. *)

val choose : t -> (node list * NUMAResource.t) Seq.t -> NUMAResource.t option
val choose :
t
-> (node list * NUMAResource.t) Seq.t
-> (NUMAResource.t * node list) option
(** [choose t resources] will choose one NUMA node deterministically, trying
to keep the overall NUMA node usage balanced *)

Expand Down
14 changes: 8 additions & 6 deletions ocaml/xenopsd/test/test_topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -210,26 +210,28 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () =
match Softaffinity.plan h nodes ~vm with
| None ->
Alcotest.fail "No NUMA plan"
| Some plan ->
D.debug "NUMA allocation succeeded for VM %d: %s" i
(Fmt.to_to_string CPUSet.pp_dump plan) ;
| Some (cpu_plan, mem_plan) ->
D.debug
"NUMA allocation succeeded for VM %d: [CPUS: %s]; [nodes: %s]" i
(Fmt.to_to_string CPUSet.pp_dump cpu_plan)
(Fmt.to_to_string Fmt.(Dump.list NUMA.pp_dump_node) mem_plan) ;
let usednodes =
plan
cpu_plan
|> CPUSet.elements
|> List.map (NUMA.node_of_cpu h)
|> List.sort_uniq compare
|> List.to_seq
in
let costs_numa_aware =
vm_access_costs h plans (vm_cores, usednodes, plan)
vm_access_costs h plans (vm_cores, usednodes, cpu_plan)
in
let costs_default =
vm_access_costs h plans (vm_cores, NUMA.nodes h, NUMA.all_cpus h)
in
cost_not_worse ~default:costs_default costs_numa_aware ;
( costs_default :: costs_old
, costs_numa_aware :: costs_new
, ((vm_cores, List.of_seq usednodes), plan) :: plans
, ((vm_cores, List.of_seq usednodes), cpu_plan) :: plans
)
)
([], [], [])
Expand Down
Loading
Loading