From 4f001f2dd25aacdb63f2b0311f395b9d76dcac9f Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 16 May 2024 16:49:39 +0800 Subject: [PATCH 1/9] CP-48625: Code refactoring Add func host_to_vm_count_map to be used, rename RefMap to HostMap Signed-off-by: Gang Ji --- ocaml/xapi/xapi_vm_helpers.ml | 83 ++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 2e07c7670cc..4de454970a7 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -35,7 +35,7 @@ module SRSet = Set.Make (struct let compare = Stdlib.compare end) -module RefMap = Map.Make (struct +module HostMap = Map.Make (struct type t = [`host] Ref.t let compare = Ref.compare @@ -997,6 +997,46 @@ let rank_hosts_by_best_vgpu ~__context vgpu visible_hosts = hosts |> List.map (fun g -> List.map fst g) +let host_to_vm_count_map ~__context group = + let host_of_vm vm = + let vm_rec = Db.VM.get_record ~__context ~self:vm in + (* 1. When a VM starts migrating, it's 'scheduled_to_be_resident_on' will be set, + while its 'resident_on' is not cleared. In this case, + 'scheduled_to_be_resident_on' should be treated as its running host. + 2. For paused VM, its 'resident_on' has value, but it will not be considered + while computing the amount of VMs. *) + match + ( vm_rec.API.vM_scheduled_to_be_resident_on + , vm_rec.API.vM_resident_on + , vm_rec.API.vM_power_state + ) + with + | sh, _, _ when sh <> Ref.null -> + Some sh + | _, h, `Running when h <> Ref.null -> + Some h + | _ -> + None + in + Db.VM_group.get_VMs ~__context ~self:group + |> List.fold_left + (fun m vm -> + match host_of_vm vm with + | Some h -> + HostMap.update h + (fun c -> Option.(value ~default:0 c |> succ |> some)) + m + | None -> + m + ) + HostMap.empty + +let rank_hosts_by_vm_cnt_in_group ~__context group hosts = + let host_map = host_to_vm_count_map ~__context group in + Helpers.group_by ~ordering:`ascending + (fun h -> HostMap.find_opt h host_map |> Option.value ~default:0) + hosts + (* Group all hosts to 2 parts: 1. A list of affinity host (only one host). 2. A list of lists, each list contains hosts with the same number of @@ -1065,6 +1105,27 @@ let rank_hosts_by_placement ~__context ~vm ~group = in affinity_host :: sorted_hosts |> List.filter (( <> ) []) +let rec select_host_from_ranked_lists ~vm ~host_selector ~ranked_host_lists = + match ranked_host_lists with + | [] -> + raise (Api_errors.Server_error (Api_errors.no_hosts_available, [])) + | hosts :: less_optimal_groups_of_hosts -> ( + debug + "Attempting to select host for VM (%s) in a group of equally optimal \ + hosts [ %s ]" + (Ref.string_of vm) + (String.concat ";" (List.map Ref.string_of hosts)) ; + try host_selector hosts + with _ -> + info + "Failed to select host for VM (%s) in any of [ %s ], continue to \ + select from less optimal hosts" + (Ref.string_of vm) + (String.concat ";" (List.map Ref.string_of hosts)) ; + select_host_from_ranked_lists ~vm ~host_selector + ~ranked_host_lists:less_optimal_groups_of_hosts + ) + (* Selects a single host from the set of all hosts on which the given [vm] can boot. Raises [Api_errors.no_hosts_available] if no such host exists. 1.Take anti-affinity, or VGPU, or Network SR-IOV as a group_key for group all hosts into host list list @@ -1109,22 +1170,12 @@ let choose_host_for_vm_no_wlb ~__context ~vm ~snapshot = ) ) in - let rec select_host_from = function - | [] -> - raise (Api_errors.Server_error (Api_errors.no_hosts_available, [])) - | hosts :: less_optimal_groups_of_hosts -> ( - debug - "Attempting to start VM (%s) on one of equally optimal hosts [ %s ]" - (Ref.string_of vm) - (String.concat ";" (List.map Ref.string_of hosts)) ; - try Xapi_vm_placement.select_host __context vm validate_host hosts - with _ -> - info "Failed to start VM (%s) on any of [ %s ]" (Ref.string_of vm) - (String.concat ";" (List.map Ref.string_of hosts)) ; - select_host_from less_optimal_groups_of_hosts - ) + let host_selector = + Xapi_vm_placement.select_host __context vm validate_host in - try select_host_from host_lists + try + select_host_from_ranked_lists ~vm ~host_selector + ~ranked_host_lists:host_lists with | Api_errors.Server_error (x, []) when x = Api_errors.no_hosts_available -> debug From 1234dc71af29ccef50dfd571ea0d291bb80f6c1b Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 16 May 2024 16:59:42 +0800 Subject: [PATCH 2/9] CP-48625: Code refactoring fixup review comments Signed-off-by: Gang Ji --- ocaml/xapi/xapi_vm_helpers.ml | 78 +++++++++++------------------------ 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 4de454970a7..c25a887b54b 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -1037,6 +1037,13 @@ let rank_hosts_by_vm_cnt_in_group ~__context group hosts = (fun h -> HostMap.find_opt h host_map |> Option.value ~default:0) hosts +let get_affinity_host ~__context ~vm = + match Db.VM.get_affinity ~__context ~self:vm with + | ref when Db.is_valid_ref __context ref -> + Some ref + | _ -> + None + (* Group all hosts to 2 parts: 1. A list of affinity host (only one host). 2. A list of lists, each list contains hosts with the same number of @@ -1051,77 +1058,40 @@ let rank_hosts_by_vm_cnt_in_group ~__context group hosts = ] *) let rank_hosts_by_placement ~__context ~vm ~group = - let affinity_host = - match Db.VM.get_affinity ~__context ~self:vm with - | ref when Db.is_valid_ref __context ref -> - [ref] - | _ -> - [] + let hosts = Db.Host.get_all ~__context in + let affinity_host = get_affinity_host ~__context ~vm in + let hosts_without_affinity = + Option.fold ~none:hosts + ~some:(fun host -> List.filter (( <> ) host) hosts) + affinity_host in let sorted_hosts = - let host_without_affinity_host = - Db.Host.get_all ~__context - |> List.filter (fun host -> not (List.mem host affinity_host)) - in - let host_of_vm vm = - let vm_rec = Db.VM.get_record ~__context ~self:vm in - (* 1. When a VM starts migrating, it's 'scheduled_to_be_resident_on' will be set, - while its 'resident_on' is not cleared. In this case, - 'scheduled_to_be_resident_on' should be treated as its running host. - 2. For paused VM, its 'resident_on' has value, but it will not be considered - while computing the amount of VMs. *) - match - ( vm_rec.API.vM_scheduled_to_be_resident_on - , vm_rec.API.vM_resident_on - , vm_rec.API.vM_power_state - ) - with - | sh, _, _ when sh <> Ref.null -> - Some sh - | _, h, `Running when h <> Ref.null -> - Some h - | _ -> - None - in - let host_map = - Db.VM_group.get_VMs ~__context ~self:group - |> List.fold_left - (fun m vm -> - match host_of_vm vm with - | Some h -> - RefMap.update h - (fun c -> Option.(value ~default:0 c |> succ |> some)) - m - | None -> - m - ) - RefMap.empty - in - host_without_affinity_host - |> Helpers.group_by ~ordering:`ascending (fun h -> - RefMap.find_opt h host_map |> Option.value ~default:0 - ) - |> List.map (fun g -> List.map fst g) + hosts_without_affinity + |> rank_hosts_by_vm_cnt_in_group ~__context group + |> List.(map (map fst)) in - affinity_host :: sorted_hosts |> List.filter (( <> ) []) + match affinity_host with + | Some host -> + [host] :: sorted_hosts + | None -> + sorted_hosts let rec select_host_from_ranked_lists ~vm ~host_selector ~ranked_host_lists = match ranked_host_lists with | [] -> raise (Api_errors.Server_error (Api_errors.no_hosts_available, [])) | hosts :: less_optimal_groups_of_hosts -> ( + let hosts_str = String.concat ";" (List.map Ref.string_of hosts) in debug "Attempting to select host for VM (%s) in a group of equally optimal \ hosts [ %s ]" - (Ref.string_of vm) - (String.concat ";" (List.map Ref.string_of hosts)) ; + (Ref.string_of vm) hosts_str ; try host_selector hosts with _ -> info "Failed to select host for VM (%s) in any of [ %s ], continue to \ select from less optimal hosts" - (Ref.string_of vm) - (String.concat ";" (List.map Ref.string_of hosts)) ; + (Ref.string_of vm) hosts_str ; select_host_from_ranked_lists ~vm ~host_selector ~ranked_host_lists:less_optimal_groups_of_hosts ) From 2b742bf9464561e45b5371dbaba10bbdab11edf8 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 23 May 2024 15:56:05 +0800 Subject: [PATCH 3/9] opam: add psq to xapi dependencies Signed-off-by: Gang Ji --- ocaml/xapi/dune | 1 + xapi.opam | 1 + xapi.opam.template | 1 + 3 files changed, 3 insertions(+) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index fe161e0dd5f..b6ec0adf084 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -83,6 +83,7 @@ pam pciutil pci + psq ptime rpclib.core rpclib.json diff --git a/xapi.opam b/xapi.opam index e414d694b2c..387ba542fe6 100644 --- a/xapi.opam +++ b/xapi.opam @@ -38,6 +38,7 @@ depends: [ "ppx_deriving_rpc" "ppx_sexp_conv" "ppx_deriving" + "psq" "rpclib" "rrdd-plugin" "rresult" diff --git a/xapi.opam.template b/xapi.opam.template index dc48554787e..49f3902f66a 100644 --- a/xapi.opam.template +++ b/xapi.opam.template @@ -36,6 +36,7 @@ depends: [ "ppx_deriving_rpc" "ppx_sexp_conv" "ppx_deriving" + "psq" "rpclib" "rrdd-plugin" "rresult" From 4452e5e77cc3d7ecb3e76e1dfb56590c24bcdb05 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 16 May 2024 16:37:06 +0800 Subject: [PATCH 4/9] CP-48625: Anti-affinity support for host evacuation Host evacuation plan with anti-affinity support will be carried out in 3 steps: 1. Try to get a "spread evenly" plan for anti-affinity VMs, done if all the rest VMs got planned using binpack, otherwise continue. 2. Try to get a "no breach" plan for anti-affinity VMs, done if all the rest VMs got planned using binpack, otherwise continue. 3. Carry out a binpack plan ignoring VM anti-affinity. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_ha_vm_failover.ml | 485 +++++++++++++++++++++++++++--- 1 file changed, 442 insertions(+), 43 deletions(-) diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 4fbf46860f2..9d8ceb0865d 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -175,9 +175,6 @@ let order_f (_, vm_rec) = let ( $ ) x y = x y -(*****************************************************************************************************) -(* Planning code follows *) - (* Compute the total memory required of a VM (Running or not) *) let total_memory_of_vm ~__context policy snapshot = let main, shadow = @@ -185,50 +182,440 @@ let total_memory_of_vm ~__context policy snapshot = in Int64.add main shadow -(** Return a VM -> Host plan for the Host.evacuate code. We assume the VMs are all agile. The returned plan may - be incomplete if there was not enough memory. *) -let compute_evacuation_plan ~__context total_hosts remaining_hosts - vms_and_snapshots = - let hosts = - List.map - (fun host -> - ( host - , Memory_check.host_compute_free_memory_with_maximum_compression - ~__context ~host None - ) - ) - remaining_hosts +let host_free_memory ~__context ~host = + Memory_check.host_compute_free_memory_with_maximum_compression ~__context + ~host None + +let vm_memory ~__context snapshot = + let policy = + match Helpers.check_domain_type snapshot.API.vM_domain_type with + | `hvm | `pv -> + Memory_check.Dynamic_min + | `pv_in_pvh | `pvh -> + Memory_check.Static_max in - let vms = - List.map - (fun (vm, snapshot) -> - let policy = - match Helpers.check_domain_type snapshot.API.vM_domain_type with - | `hvm | `pv -> - Memory_check.Dynamic_min - | `pv_in_pvh | `pvh -> - Memory_check.Static_max - in - (vm, total_memory_of_vm ~__context policy snapshot) - ) - vms_and_snapshots + total_memory_of_vm ~__context policy snapshot + +module VMGroupMap = Map.Make (struct + type t = [`VM_group] Ref.t + + let compare = Ref.compare +end) + +module HostKey = struct + type t = [`host] Ref.t + + let compare = Ref.compare +end + +(* For some VM anti-affinity group, the state of a host which determines + evacuation planning for anti-affinity VMs in that group: + 1. vm_cnt: the number of running VMs in that group resident on the host + 2. h_size: the amount of free memory of the host *) +module AntiAffinityEvacuationPlanHostState = struct + type t = int * int64 + + (* During evacuation planning for anti-affintiy VMs, "vm_cnt" is the first + factor considered, "h_size" is the second factor considered: + Let's say the next anti-affinity VM to be planned belongs to group A, the + host to be selected should be the one which has minimal "vm_cnt" of group + A, for hosts with the same "vm_cnt", pick the one with the minimal + "h_size" which can hold the VM(h_size >= vm_size). *) + let compare (vm_cnt_0, h_size_0) (vm_cnt_1, h_size_1) = + match Int.compare vm_cnt_0 vm_cnt_1 with + | 0 -> + Int64.compare h_size_0 h_size_1 + | c -> + c +end + +(* A Psq of hosts for an anti-affinity group, which is used for evacuation + planning for anti-affinity VMs in that group: the minimal host in this Psq + is the first one to be considered to plan a VM from that group. + When several hosts share the minimal "AntiAffinityEvacuationPlanHostState", + the minimal host is the host with the smallest ref. *) +module AntiAffinityEvacuationPlanHostQueue = + Psq.Make (HostKey) (AntiAffinityEvacuationPlanHostState) + +(* The pool state which determines the spread evenly evacuation planning for + anti-affinity VMs: a VMGroupMap which contains a Psq - + AntiAffinityEvacuationPlanHostQueue for each group, and each Psq contains + all the available hosts in the pool. + Let's say the next anti-affinity VM to be planned belongs to anti-affinity + group A. To get a spread evenly evacuation plan, the most suitable host to + plan the VM would be the host which has the minimal number of running VMs + from group A resident on it, for the hosts with the same number of running + VMs from group A, the one with the minimal free memory will be checked + first, which is just the minimal host returned from + "AntiAffinityEvacuationPlanHostQueue.min" on the Psq of group A. *) +type spread_evenly_plan_pool_state = + AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + +(* The pool state which determines the no breach evacuation planning for anti- + affinity VMs: + grp_no_resident_hosts_queue: a VMGroupMap which contains a Psq - + AntiAffinityEvacuationPlanHostQueue for each group, and each Psq contains + "no_resident" hosts for that group. (A "no_resident" host for a group is a + host which has no running VMs from that group resident on it.) + grp_resident_hosts_count_map: a VMGroupMap which contains the number of + "resident" hosts for each group. (A "resident" host for a group is a host + which has at least one running VM from that group resident on it.) + Let's say the next anti-affinity VM to be planned belongs to anti-affinity + group A. If for group A, "resident_hosts_count" is already 2 or + greater than 2, we don't need to plan the VM on any host, if not, we will + need to check the host with the minimal free memory from the no resident + hosts queue, which is just the minimal host returned from + "AntiAffinityEvacuationPlanHostQueue.min" on the "no_resident" hosts queue + of group A. *) +type no_breach_plan_pool_state = { + grp_no_resident_hosts_queue: + AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + ; grp_resident_hosts_count_map: int VMGroupMap.t +} + +let init_spread_evenly_plan_pool_state ~__context hosts = + Db.VM_group.get_all ~__context + |> List.filter (fun group -> + Db.VM_group.get_placement ~__context ~self:group = `anti_affinity + ) + |> List.fold_left + (fun grp_to_psq_map grp -> + let host_vm_cnt_map = + Xapi_vm_helpers.host_to_vm_count_map ~__context grp + in + VMGroupMap.add grp + (List.fold_left + (fun q (h, h_size) -> + let vm_cnt = + Xapi_vm_helpers.HostMap.find_opt h host_vm_cnt_map + |> Option.value ~default:0 + in + AntiAffinityEvacuationPlanHostQueue.add h (vm_cnt, h_size) q + ) + AntiAffinityEvacuationPlanHostQueue.empty hosts + ) + grp_to_psq_map + ) + VMGroupMap.empty + +(* Update "spread_evenly_plan_pool_state" after a VM from anti-affinity "group" with + memory size: "vm_size" is planned on the "host": + 1. For the "group", increase "vm_cnt" of the "host" by 1. + 2. For each group, updates the host's size by substracting "vm_size". *) +let update_spread_evenly_plan_pool_state vm_size group host pool_state = + VMGroupMap.mapi + (fun grp hosts_q -> + match grp = group with + | true -> + AntiAffinityEvacuationPlanHostQueue.adjust host + (fun (vm_cnt, h_size) -> (vm_cnt + 1, Int64.sub h_size vm_size)) + hosts_q + | false -> + AntiAffinityEvacuationPlanHostQueue.adjust host + (fun (vm_cnt, h_size) -> (vm_cnt, Int64.sub h_size vm_size)) + hosts_q + ) + pool_state + +let init_no_breach_plan_pool_state spread_evenly_plan_pool_state = + let grp_no_resident_hosts_queue = + spread_evenly_plan_pool_state + |> VMGroupMap.map + (AntiAffinityEvacuationPlanHostQueue.filter + (fun _h (vm_cnt, _h_size) -> vm_cnt = 0 + ) + ) + in + let grp_resident_hosts_count_map = + spread_evenly_plan_pool_state + |> VMGroupMap.map (fun hosts_q -> + hosts_q + |> AntiAffinityEvacuationPlanHostQueue.filter + (fun _h (vm_cnt, _size) -> vm_cnt > 0 + ) + |> AntiAffinityEvacuationPlanHostQueue.size + ) in + {grp_no_resident_hosts_queue; grp_resident_hosts_count_map} + +let update_grp_no_resident_hosts_queue vm_size group host + grp_no_resident_hosts_queue = + VMGroupMap.mapi + (fun grp hosts_q -> + match grp = group with + | true -> + hosts_q |> AntiAffinityEvacuationPlanHostQueue.remove host + | false -> + hosts_q + |> AntiAffinityEvacuationPlanHostQueue.update host + (Option.map (fun (_vm_cnt, h_size) -> + (_vm_cnt, Int64.sub h_size vm_size) + ) + ) + ) + grp_no_resident_hosts_queue + +(* Update "no_breach_plan_pool_state" after a VM from anti-affinity "group" with + memory size: "vm_size" is planned on the "host": + 1. For the "group", the "host" is removed from its "no_resident" hosts queue, and increase its + "resident_hosts_count" by 1. + 2. For other groups, updates the host's size by substracting "vm_size" if the host is in that + group's "no_resident" hosts queue. *) +let update_no_breach_plan_pool_state vm_size group host pool_state = + { + grp_no_resident_hosts_queue= + pool_state.grp_no_resident_hosts_queue + |> update_grp_no_resident_hosts_queue vm_size group host + ; grp_resident_hosts_count_map= + pool_state.grp_resident_hosts_count_map + |> VMGroupMap.update group (Option.map succ) + } + +(*****************************************************************************************************) +(* Planning code follows *) + +let rec select_host_for_spread_evenly_plan vm vm_size hosts_queue = + match AntiAffinityEvacuationPlanHostQueue.min hosts_queue with + | None -> + None + | Some (host, (_vm_cnt, h_size)) -> ( + match vm_size <= h_size with + | true -> + Some host + | _ -> + hosts_queue + |> AntiAffinityEvacuationPlanHostQueue.remove host + |> select_host_for_spread_evenly_plan vm vm_size + ) + +let rec select_host_for_no_breach_plan vm vm_size hosts_queue = + match AntiAffinityEvacuationPlanHostQueue.min hosts_queue with + | Some (host, (0, h_size)) -> ( + match vm_size <= h_size with + | true -> + Some host + | _ -> + hosts_queue + |> AntiAffinityEvacuationPlanHostQueue.remove host + |> select_host_for_no_breach_plan vm vm_size + ) + | _ -> + None + +(* Try to get a spread evenly plan for anti-affinity VMs (for each anti-affinity group, the number of + running VMs from that group are spread evenly in all the rest hosts in the pool): + 1. For all the anti-affinity VMs which sorted in an increasing order in terms of the VM's memory + size, do host selection as below step 2. + 2. For each anti-affinity VM, select a host which can run it, and which has the minimal number of + VMs in the same anti-affinity group running on it, for the hosts with the same number of running + VMs in that group, pick the one with the minimal free memory. *) +let rec compute_spread_evenly_plan ~__context pool_state anti_affinity_vms + mapping = + info "compute_spread_evenly_plan" ; + match anti_affinity_vms with + | [] -> + mapping + | (vm, vm_size, group) :: remaining_vms -> ( + debug + "Spread evenly plan: try to plan for the anti-affinity VM (%s %s %s)." + (Ref.string_of vm) + (Db.VM.get_name_label ~__context ~self:vm) + (Db.VM_group.get_name_label ~__context ~self:group) ; + match + VMGroupMap.find group pool_state + |> select_host_for_spread_evenly_plan vm vm_size + with + | None -> + debug + "Spread evenly plan: no host can run this anti-affinity VM. Stop \ + the planning as there won't be a valid plan for this VM." ; + raise (Api_errors.Server_error (Api_errors.no_hosts_available, [])) + | Some h -> + debug + "Spread evenly plan: choose the host with the minimal free memory \ + which can run the vm: (%s %s)." + (Ref.string_of h) + (Db.Host.get_name_label ~__context ~self:h) ; + compute_spread_evenly_plan ~__context + (pool_state |> update_spread_evenly_plan_pool_state vm_size group h) + remaining_vms ((vm, h) :: mapping) + ) + +(* Try to get a no breach plan for anti-affinity VMs (for each anti-affinity group, there are at least + 2 hosts having running VMs in the group): + 1. For all the anti-affinity VMs which sorted in an increasing order in terms of the VM's memory + size, do host selection as below step 2. + 2. For each anti-affinity VM, try to select a host for it so that there are at least 2 hosts which + has running VMs in the same anti-affinity group. If there are already 2 hosts having running VMs + in that group, skip planning for the VM. *) +let rec compute_no_breach_plan ~__context total_hosts pool_state + anti_affinity_vms mapping = + info "compute_no_breach_plan" ; + match (total_hosts, anti_affinity_vms) with + | not_enough, _ when not_enough < 3 -> + debug + "There are less than 2 available hosts to migrate VMs to, \ + anti-affinity breach is inevitable" ; + raise (Api_errors.Server_error (Api_errors.no_hosts_available, [])) + | _, [] -> + mapping + | _, (vm, vm_size, group) :: remaining_vms -> + debug "No breach plan: try to plan for VM (%s %s %s)." (Ref.string_of vm) + (Db.VM.get_name_label ~__context ~self:vm) + (Db.VM_group.get_name_label ~__context ~self:group) ; + let no_resident_hosts = + VMGroupMap.find group pool_state.grp_no_resident_hosts_queue + in + + let resident_hosts_cnt = + VMGroupMap.find group pool_state.grp_resident_hosts_count_map + in + info "No breach plan: total_hosts: %d, resident_hosts_cnt: %d." + total_hosts resident_hosts_cnt ; + + if resident_hosts_cnt < 2 then ( + debug + "No breach plan: there are less than 2 hosts has running VM in the \ + same anti-affinity group, and there are still host(s) which has 0 \ + running VMs, try to plan for it." ; + match select_host_for_no_breach_plan vm vm_size no_resident_hosts with + | None -> + info + "No breach plan: failed to select host on any of the no resident \ + hosts, skip it, continue with the next VM." ; + compute_no_breach_plan ~__context total_hosts pool_state + remaining_vms mapping + | Some h -> + debug + "No breach plan: choose the no resident host with the minimal \ + free memory which can run the vm: (%s)." + (Db.Host.get_name_label ~__context ~self:h) ; + compute_no_breach_plan ~__context total_hosts + (pool_state |> update_no_breach_plan_pool_state vm_size group h) + remaining_vms ((vm, h) :: mapping) + ) else ( + debug + "No breach plan: no need to plan for the VM as the number of hosts \ + which has running VMs from the same group is no less than 2, \ + continue to plan for the next one." ; + compute_no_breach_plan ~__context total_hosts pool_state remaining_vms + mapping + ) + +let anti_affinity_vms_increasing ~__context vms = + vms + |> List.filter_map (fun (vm, vm_size) -> + match Xapi_vm_helpers.vm_has_anti_affinity ~__context ~vm with + | Some (`AntiAffinity group) -> + Some (vm, vm_size, group) + | _ -> + None + ) + |> List.sort (fun (_, a, _) (_, b, _) -> compare a b) + +(* Return an evacuation plan respecting VM anti-affinity rules: it is done in 3 phases: + 1. Try to get a "spread evenly" plan for anti-affinity VMs, and then a binpack plan for + the rest of VMs. Done if every VM got planned, otherwise continue. + 2. Try to get a "no breach" plan for anti-affinity VMs, and then a binpack plan for the + rest of VMs. Done if every VM got planned, otherwise continue. + 3. Carry out a binpack plan ignoring VM anti-affinity. *) +let compute_anti_affinity_evacuation_plan ~__context total_hosts hosts vms = let config = {Binpack.hosts; vms; placement= []; total_hosts; num_failures= 1} in Binpack.check_configuration config ; - debug "Planning configuration for offline agile VMs = %s" - (Binpack.string_of_configuration - (fun x -> - Printf.sprintf "%s (%s)" (Ref.short_string_of x) - (Db.Host.get_hostname ~__context ~self:x) - ) - (fun x -> - Printf.sprintf "%s (%s)" (Ref.short_string_of x) - (Db.VM.get_name_label ~__context ~self:x) - ) - config - ) ; + + let anti_affinity_vms = vms |> anti_affinity_vms_increasing ~__context in + + let spread_evenly_plan_pool_state = + init_spread_evenly_plan_pool_state ~__context hosts + in + + let no_breach_plan_pool_state = + init_no_breach_plan_pool_state spread_evenly_plan_pool_state + in + + let binpack_plan ~__context config vms = + debug "Binpack planning configuration = %s" + (Binpack.string_of_configuration + (fun x -> + Printf.sprintf "%s (%s)" (Ref.short_string_of x) + (Db.Host.get_name_label ~__context ~self:x) + ) + (fun x -> + Printf.sprintf "%s (%s)" (Ref.short_string_of x) + (Db.VM.get_name_label ~__context ~self:x) + ) + config + ) ; + debug "VMs to attempt to evacuate: [ %s ]" + (String.concat "; " + (vms + |> List.map (fun (self, _) -> + Printf.sprintf "%s (%s)" (Ref.short_string_of self) + (Db.VM.get_name_label ~__context ~self) + ) + ) + ) ; + let h = Binpack.choose_heuristic config in + h.Binpack.get_specific_plan config (List.map fst vms) + in + + let try_to_binpack_with_plan_applied_first plan = + debug "Try to binpack for the rest VMs" ; + let config_after_plan_applied = Binpack.apply_plan config plan in + let vms_to_plan = + let planned_vm = List.map fst plan in + vms |> List.filter (fun (vm, _) -> not (List.mem vm planned_vm)) + in + let config_after_plan_applied = + {config_after_plan_applied with vms= vms_to_plan} + in + let b_plan = + binpack_plan ~__context config_after_plan_applied vms_to_plan + in + match List.length b_plan = List.length vms_to_plan with + | true -> + debug "Got final plan with plan applied first." ; + Some (plan @ b_plan) + | false -> + debug + "Failed to get final plan as failed to binpack for all the rest VMs." ; + None + in + + let rec plan_in_steps ~__context plan_steps = + match plan_steps with + | [] -> + debug "Fallback to binpack plan..." ; + binpack_plan ~__context config vms + | plan_func :: plan_funcs -> ( + match plan_func ~__context |> try_to_binpack_with_plan_applied_first with + | exception Api_errors.Server_error (error_code, []) + when error_code = Api_errors.no_hosts_available -> + debug + "Anti-affinity VMs plan failed, continue to try next step \ + planning..." ; + plan_in_steps ~__context plan_funcs + | None -> + debug "Plan failed, continue to try next step planning..." ; + plan_in_steps ~__context plan_funcs + | Some plan -> + debug "Plan OK" ; plan + ) + in + plan_in_steps ~__context + [ + compute_spread_evenly_plan spread_evenly_plan_pool_state anti_affinity_vms + [] + ; compute_no_breach_plan total_hosts no_breach_plan_pool_state + anti_affinity_vms [] + ] + +(** Return a VM -> Host plan for the Host.evacuate code. We assume the VMs are all agile. The returned plan may + be incomplete if there was not enough memory. *) +let compute_evacuation_plan ~__context total_hosts remaining_hosts + vms_and_snapshots = debug "VMs to attempt to evacuate: [ %s ]" (String.concat "; " (List.map @@ -239,8 +626,20 @@ let compute_evacuation_plan ~__context total_hosts remaining_hosts vms_and_snapshots ) ) ; - let h = Binpack.choose_heuristic config in - h.Binpack.get_specific_plan config (List.map fst vms_and_snapshots) + let hosts = + List.map + (fun host -> (host, host_free_memory ~__context ~host)) + remaining_hosts + in + let vms = + List.map + (fun (vm, snapshot) -> (vm, vm_memory ~__context snapshot)) + vms_and_snapshots + in + try compute_anti_affinity_evacuation_plan ~__context total_hosts hosts vms + with Not_found -> + error "Data conrupted during compute_anti_affinity_evacuation_plan." ; + raise (Api_errors.Server_error (Api_errors.internal_error, [])) (** Passed to the planner to reason about other possible configurations, used to block operations which would destroy the HA VM restart plan. *) From 99f39dc00bd7be7b4592d22ede6918679f756c00 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 2 Apr 2024 10:51:21 +0800 Subject: [PATCH 5/9] CP-48752: Update UT - add "groups" for VM Signed-off-by: Gang Ji --- ocaml/tests/test_ha_vm_failover.ml | 39 ++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index 1c4cf79e6ea..3b4a78ef0a4 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -27,6 +27,8 @@ type vbd = {agile: bool} type vif = {agile: bool} +type group = {name_label: string; placement: string} + type vm = { ha_always_run: bool ; ha_restart_priority: string @@ -34,6 +36,7 @@ type vm = { ; name_label: string ; vbds: vbd list ; vifs: vif list + ; groups: group list } let basic_vm = @@ -44,6 +47,7 @@ let basic_vm = ; name_label= "vm" ; vbds= [{agile= true}] ; vifs= [{agile= true}] + ; groups= [] } type host = {memory_total: int64; name_label: string; vms: vm list} @@ -55,8 +59,13 @@ type pool = { ; cluster: int } -let string_of_vm {memory; name_label; _} = - Printf.sprintf "{memory = %Ld; name_label = %S}" memory name_label +let string_of_group {name_label; placement} = + Printf.sprintf "{name_label = %S; placement = %S}" name_label placement + +let string_of_vm {memory; name_label; groups; _} = + Printf.sprintf "{memory = %Ld; name_label = %S; groups = [%s]}" memory + name_label + (Test_printers.list string_of_group groups) let string_of_host {memory_total; name_label; vms} = Printf.sprintf "{memory_total = %Ld; name_label = %S; vms = [%s]}" @@ -71,6 +80,26 @@ let string_of_pool {master; slaves; ha_host_failures_to_tolerate; cluster} = (Test_printers.list string_of_host slaves) ha_host_failures_to_tolerate cluster +let load_group ~__context ~group = + let placement = + match group.placement with + | "anti_affinity" -> + `anti_affinity + | _ -> + `normal + in + match + Db.VM_group.get_all ~__context + |> List.find_opt (fun g -> + Db.VM_group.get_name_label ~__context ~self:g = group.name_label + && Db.VM_group.get_placement ~__context ~self:g = placement + ) + with + | None -> + make_vm_group ~__context ~name_label:group.name_label ~placement () + | Some g -> + g + let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net = let vm_ref = make_vm ~__context ~ha_always_run:vm.ha_always_run @@ -98,6 +127,12 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net = ) vm.vbds in + let groups = + List.fold_left + (fun acc group -> load_group ~__context ~group :: acc) + [] vm.groups + in + Db.VM.set_groups ~__context ~self:vm_ref ~value:groups ; vm_ref let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net = From e7126362b8bf783648733fe7913145c51e3ba39f Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 4 Apr 2024 22:11:20 +0800 Subject: [PATCH 6/9] CP-48752: Update UT - vm set_resident_on host Signed-off-by: Gang Ji --- ocaml/tests/test_ha_vm_failover.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index 3b4a78ef0a4..f2f65fab333 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -145,7 +145,9 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net = let (_ : API.ref_VM list) = List.map (fun vm -> - load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net + let vm_ref = load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net in + Db.VM.set_resident_on ~__context ~self:vm_ref ~value:host_ref ; + vm_ref ) host.vms in From 05b24d5aefc2241db09ad6d1a97bd8806fc668f6 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 18 Apr 2024 19:39:52 +0800 Subject: [PATCH 7/9] CP-48752: Update UT - add "power_state" for VM Signed-off-by: Gang Ji --- ocaml/tests/test_ha_vm_failover.ml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index f2f65fab333..6cfc580b4b3 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -37,6 +37,7 @@ type vm = { ; vbds: vbd list ; vifs: vif list ; groups: group list + ; power_state: string } let basic_vm = @@ -48,6 +49,7 @@ let basic_vm = ; vbds= [{agile= true}] ; vifs= [{agile= true}] ; groups= [] + ; power_state= "running" } type host = {memory_total: int64; name_label: string; vms: vm list} @@ -133,6 +135,8 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net = [] vm.groups in Db.VM.set_groups ~__context ~self:vm_ref ~value:groups ; + if "running" = vm.power_state then + Db.VM.set_power_state ~__context ~self:vm_ref ~value:`Running ; vm_ref let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net = From fca296acbe197692f2e6115957903fe396195e9c Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 29 Apr 2024 15:59:56 +0800 Subject: [PATCH 8/9] CP-48752: Update UT - support testing pool with more than 3 hosts Add "keep_localhost" to enable test pool with more than 3 hosts. Signed-off-by: Gang Ji --- ocaml/tests/test_ha_vm_failover.ml | 61 ++++++++++++++++++------------ 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index 6cfc580b4b3..0359d20284d 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -59,8 +59,19 @@ type pool = { ; slaves: host list ; ha_host_failures_to_tolerate: int64 ; cluster: int + ; keep_localhost: bool + (* true to get a Features.Pool_size enabled pool which can have more than 3 hosts *) } +let basic_pool = + { + master= {memory_total= gib 256L; name_label= "master"; vms= []} + ; slaves= [] + ; ha_host_failures_to_tolerate= 0L + ; cluster= 0 + ; keep_localhost= false + } + let string_of_group {name_label; placement} = Printf.sprintf "{name_label = %S; placement = %S}" name_label placement @@ -74,13 +85,14 @@ let string_of_host {memory_total; name_label; vms} = memory_total name_label (Test_printers.list string_of_vm vms) -let string_of_pool {master; slaves; ha_host_failures_to_tolerate; cluster} = +let string_of_pool + {master; slaves; ha_host_failures_to_tolerate; cluster; keep_localhost} = Printf.sprintf "{master = %s; slaves = %s; ha_host_failures_to_tolerate = %Ld; cluster = \ - %d}" + %d; keep_localhost = %B}" (string_of_host master) (Test_printers.list string_of_host slaves) - ha_host_failures_to_tolerate cluster + ha_host_failures_to_tolerate cluster keep_localhost let load_group ~__context ~group = let placement = @@ -157,13 +169,14 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net = in host_ref -let setup ~__context {master; slaves; ha_host_failures_to_tolerate; cluster} = +let setup ~__context + {master; slaves; ha_host_failures_to_tolerate; cluster; keep_localhost} = let shared_sr = make_sr ~__context ~shared:true () in let shared_net = make_network ~__context ~bridge:"xenbr0" () in - (* Remove all hosts added by make_test_database *) - List.iter - (fun host -> Db.Host.destroy ~__context ~self:host) - (Db.Host.get_all ~__context) ; + if not keep_localhost then (* Remove all hosts added by make_test_database *) + List.iter + (fun host -> Db.Host.destroy ~__context ~self:host) + (Db.Host.get_all ~__context) ; let load_host_and_local_resources host = let local_sr = make_sr ~__context ~shared:false () in let local_net = make_network ~__context ~bridge:"xapi0" () in @@ -225,15 +238,15 @@ module AllProtectedVms = Generic.MakeStateful (struct [ (* No VMs and a single host. *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= []} ; slaves= [] - ; ha_host_failures_to_tolerate= 0L - ; cluster= 0 } , [] ) ; (* One unprotected VM. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -244,13 +257,12 @@ module AllProtectedVms = Generic.MakeStateful (struct ] } ; slaves= [] - ; ha_host_failures_to_tolerate= 0L - ; cluster= 0 } , [] ) ; (* One VM which would be protected if it was running. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -258,23 +270,21 @@ module AllProtectedVms = Generic.MakeStateful (struct ; vms= [{basic_vm with ha_always_run= false}] } ; slaves= [] - ; ha_host_failures_to_tolerate= 0L - ; cluster= 0 } , [] ) ; (* One protected VM. *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= [basic_vm]} ; slaves= [] - ; ha_host_failures_to_tolerate= 0L - ; cluster= 0 } , ["vm"] ) ; (* One protected VM and one unprotected VM. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -291,8 +301,6 @@ module AllProtectedVms = Generic.MakeStateful (struct ] } ; slaves= [] - ; ha_host_failures_to_tolerate= 0L - ; cluster= 0 } , ["vm1"] ) @@ -334,16 +342,17 @@ module PlanForNFailures = Generic.MakeStateful (struct [ (* Two host pool with no VMs. *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= []} ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs ) ; (* Two host pool, with one VM taking up just under half of one host's * memory. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -352,12 +361,12 @@ module PlanForNFailures = Generic.MakeStateful (struct } ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs ) ; (* Two host pool, with two VMs taking up almost all of one host's memory. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -370,12 +379,12 @@ module PlanForNFailures = Generic.MakeStateful (struct } ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs ) ; (* Two host pool, overcommitted. *) ( { + basic_pool with master= { memory_total= gib 256L @@ -399,7 +408,6 @@ module PlanForNFailures = Generic.MakeStateful (struct } ] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , Xapi_ha_vm_failover.No_plan_exists ) @@ -460,6 +468,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct (* 2 host pool, one VM using just under half of one host's memory; * test that another VM can be added. *) ( ( { + basic_pool with master= { memory_total= gib 256L @@ -468,7 +477,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct } ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , { basic_vm with @@ -483,6 +491,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct ; (* 2 host pool, two VMs using almost all of one host's memory; * test that another VM cannot be added. *) ( ( { + basic_pool with master= { memory_total= gib 256L @@ -495,7 +504,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct } ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , { basic_vm with @@ -513,6 +521,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct ; (* 2 host pool which is already overcommitted. Attempting to add another VM * should not throw an exception. *) ( ( { + basic_pool with master= { memory_total= gib 256L @@ -532,7 +541,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct } ] ; ha_host_failures_to_tolerate= 1L - ; cluster= 0 } , { basic_vm with @@ -574,6 +582,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct [ (* Three host pool with no VMs. *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= []} ; slaves= [ @@ -589,6 +598,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct ) ; (* Two hosts pool with no VMs *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= []} ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}] ; ha_host_failures_to_tolerate= 2L @@ -599,6 +609,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct ) ; (* Two host pool with one down *) ( { + basic_pool with master= {memory_total= gib 256L; name_label= "master"; vms= []} ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}] ; ha_host_failures_to_tolerate= 2L From 96348f1be2a9da5e5cccca006b87640531e5f72e Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 29 Apr 2024 16:05:11 +0800 Subject: [PATCH 9/9] CP-48752: Add UT for host evacuation with anti-affinity support Signed-off-by: Gang Ji --- ocaml/tests/test_ha_vm_failover.ml | 899 +++++++++++++++++++++++++++-- ocaml/xapi/xapi_ha_vm_failover.mli | 58 ++ 2 files changed, 900 insertions(+), 57 deletions(-) diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index 0359d20284d..163ea0fd402 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -27,13 +27,15 @@ type vbd = {agile: bool} type vif = {agile: bool} -type group = {name_label: string; placement: string} +type placement_policy = AntiAffinity | Normal + +type group = {name_label: string; placement: placement_policy} type vm = { ha_always_run: bool ; ha_restart_priority: string ; memory: int64 - ; name_label: string + ; vm_name_label: string ; vbds: vbd list ; vifs: vif list ; groups: group list @@ -45,7 +47,7 @@ let basic_vm = ha_always_run= true ; ha_restart_priority= "restart" ; memory= gib 1L - ; name_label= "vm" + ; vm_name_label= "vm" ; vbds= [{agile= true}] ; vifs= [{agile= true}] ; groups= [] @@ -63,9 +65,62 @@ type pool = { (* true to get a Features.Pool_size enabled pool which can have more than 3 hosts *) } +let master = "master" + +let slave = "slave" + +let slave1 = "slave1" + +let slave2 = "slave2" + +let slave3 = "slave3" + +let grp1 = "grp1" + +let grp2 = "grp2" + +(** vmX_grpY: in test case for anti_affinity, the VM is the Xth smallest of slave1's VMs of + the same placement type in terms of VM's memory size, and it belows to VM group: grpY. *) +let vm1_grp1 = "vm1_grp1" + +let vm2_grp1 = "vm2_grp1" + +let vm3_grp1 = "vm3_grp1" + +let vm4_grp1 = "vm4_grp1" + +let vm5_grp1 = "vm5_grp1" + +let vm6_grp1 = "vm6_grp1" + +let vm8_grp1 = "vm8_grp1" + +let vm2_grp2 = "vm2_grp2" + +let vm3_grp2 = "vm3_grp2" + +let vm4_grp2 = "vm4_grp2" + +let vm5_grp2 = "vm5_grp2" + +let vm7_grp2 = "vm7_grp2" + +(** In test case for anti_affinity, it is a VM resident on host other than slave1 *) +let vm_grp1 = "vm_grp1" + +(** vmX: in test case for anti_affinity, it is a VM not in any VM group, and it is the Xth + largest of slave1's VMs not in any VM group in terms of VM's memory size. *) +let vm1 = "vm1" + +let vm2 = "vm2" + +let vm3 = "vm3" + +let vm4 = "vm4" + let basic_pool = { - master= {memory_total= gib 256L; name_label= "master"; vms= []} + master= {memory_total= gib 256L; name_label= master; vms= []} ; slaves= [] ; ha_host_failures_to_tolerate= 0L ; cluster= 0 @@ -73,11 +128,12 @@ let basic_pool = } let string_of_group {name_label; placement} = - Printf.sprintf "{name_label = %S; placement = %S}" name_label placement + Printf.sprintf "{name_label = %S; placement = %S}" name_label + (match placement with AntiAffinity -> "anti_affinity" | Normal -> "normal") -let string_of_vm {memory; name_label; groups; _} = +let string_of_vm {memory; vm_name_label; groups; _} = Printf.sprintf "{memory = %Ld; name_label = %S; groups = [%s]}" memory - name_label + vm_name_label (Test_printers.list string_of_group groups) let string_of_host {memory_total; name_label; vms} = @@ -97,9 +153,9 @@ let string_of_pool let load_group ~__context ~group = let placement = match group.placement with - | "anti_affinity" -> + | AntiAffinity -> `anti_affinity - | _ -> + | Normal -> `normal in match @@ -119,7 +175,7 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net = make_vm ~__context ~ha_always_run:vm.ha_always_run ~ha_restart_priority:vm.ha_restart_priority ~memory_static_min:vm.memory ~memory_dynamic_min:vm.memory ~memory_dynamic_max:vm.memory - ~memory_static_max:vm.memory ~name_label:vm.name_label () + ~memory_static_max:vm.memory ~name_label:vm.vm_name_label () in let (_ : API.ref_VIF list) = List.mapi @@ -161,7 +217,9 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net = let (_ : API.ref_VM list) = List.map (fun vm -> - let vm_ref = load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net in + let vm_ref = + load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net + in Db.VM.set_resident_on ~__context ~self:vm_ref ~value:host_ref ; vm_ref ) @@ -239,7 +297,7 @@ module AllProtectedVms = Generic.MakeStateful (struct (* No VMs and a single host. *) ( { basic_pool with - master= {memory_total= gib 256L; name_label= "master"; vms= []} + master= {memory_total= gib 256L; name_label= master; vms= []} ; slaves= [] } , [] @@ -250,7 +308,7 @@ module AllProtectedVms = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ {basic_vm with ha_always_run= false; ha_restart_priority= ""} @@ -266,7 +324,7 @@ module AllProtectedVms = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [{basic_vm with ha_always_run= false}] } ; slaves= [] @@ -277,7 +335,7 @@ module AllProtectedVms = Generic.MakeStateful (struct ( { basic_pool with master= - {memory_total= gib 256L; name_label= "master"; vms= [basic_vm]} + {memory_total= gib 256L; name_label= master; vms= [basic_vm]} ; slaves= [] } , ["vm"] @@ -288,21 +346,21 @@ module AllProtectedVms = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ - {basic_vm with name_label= "vm1"} + {basic_vm with vm_name_label= vm1} ; { basic_vm with ha_always_run= false ; ha_restart_priority= "" - ; name_label= "vm2" + ; vm_name_label= vm2 } ] } ; slaves= [] } - , ["vm1"] + , [vm1] ) ] end) @@ -343,8 +401,8 @@ module PlanForNFailures = Generic.MakeStateful (struct (* Two host pool with no VMs. *) ( { basic_pool with - master= {memory_total= gib 256L; name_label= "master"; vms= []} - ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] + master= {memory_total= gib 256L; name_label= master; vms= []} + ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}] ; ha_host_failures_to_tolerate= 1L } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs @@ -356,10 +414,10 @@ module PlanForNFailures = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" - ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}] + ; name_label= master + ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}] } - ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] + ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}] ; ha_host_failures_to_tolerate= 1L } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs @@ -370,14 +428,14 @@ module PlanForNFailures = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ - {basic_vm with memory= gib 120L; name_label= "vm1"} - ; {basic_vm with memory= gib 120L; name_label= "vm2"} + {basic_vm with memory= gib 120L; vm_name_label= vm1} + ; {basic_vm with memory= gib 120L; vm_name_label= vm2} ] } - ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] + ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}] ; ha_host_failures_to_tolerate= 1L } , Xapi_ha_vm_failover.Plan_exists_for_all_VMs @@ -388,22 +446,22 @@ module PlanForNFailures = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ - {basic_vm with memory= gib 120L; name_label= "vm1"} - ; {basic_vm with memory= gib 120L; name_label= "vm2"} + {basic_vm with memory= gib 120L; vm_name_label= vm1} + ; {basic_vm with memory= gib 120L; vm_name_label= vm2} ] } ; slaves= [ { memory_total= gib 256L - ; name_label= "slave" + ; name_label= slave ; vms= [ - {basic_vm with memory= gib 120L; name_label= "vm3"} - ; {basic_vm with memory= gib 120L; name_label= "vm4"} + {basic_vm with memory= gib 120L; vm_name_label= vm3} + ; {basic_vm with memory= gib 120L; vm_name_label= vm4} ] } ] @@ -472,10 +530,10 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" - ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}] + ; name_label= master + ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}] } - ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] + ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}] ; ha_host_failures_to_tolerate= 1L } , { @@ -483,7 +541,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct ha_always_run= false ; ha_restart_priority= "restart" ; memory= gib 120L - ; name_label= "vm2" + ; vm_name_label= vm2 } ) , Ok () @@ -495,14 +553,14 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ - {basic_vm with memory= gib 120L; name_label= "vm1"} - ; {basic_vm with memory= gib 120L; name_label= "vm2"} + {basic_vm with memory= gib 120L; vm_name_label= vm1} + ; {basic_vm with memory= gib 120L; vm_name_label= vm2} ] } - ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}] + ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}] ; ha_host_failures_to_tolerate= 1L } , { @@ -510,7 +568,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct ha_always_run= false ; ha_restart_priority= "restart" ; memory= gib 120L - ; name_label= "vm2" + ; vm_name_label= vm2 } ) , Error @@ -525,19 +583,19 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct master= { memory_total= gib 256L - ; name_label= "master" + ; name_label= master ; vms= [ - {basic_vm with memory= gib 120L; name_label= "vm1"} - ; {basic_vm with memory= gib 120L; name_label= "vm2"} + {basic_vm with memory= gib 120L; vm_name_label= vm1} + ; {basic_vm with memory= gib 120L; vm_name_label= vm2} ] } ; slaves= [ { memory_total= gib 256L - ; name_label= "slave" - ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}] + ; name_label= slave + ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}] } ] ; ha_host_failures_to_tolerate= 1L @@ -547,7 +605,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct ha_always_run= false ; ha_restart_priority= "restart" ; memory= gib 120L - ; name_label= "vm2" + ; vm_name_label= vm2 } ) , Ok () @@ -583,11 +641,11 @@ module ComputeMaxFailures = Generic.MakeStateful (struct (* Three host pool with no VMs. *) ( { basic_pool with - master= {memory_total= gib 256L; name_label= "master"; vms= []} + master= {memory_total= gib 256L; name_label= master; vms= []} ; slaves= [ - {memory_total= gib 256L; name_label= "slave1"; vms= []} - ; {memory_total= gib 256L; name_label= "slave2"; vms= []} + {memory_total= gib 256L; name_label= slave1; vms= []} + ; {memory_total= gib 256L; name_label= slave2; vms= []} ] ; (* Placeholder value that is overridden when we call the compute function *) ha_host_failures_to_tolerate= 3L @@ -599,8 +657,8 @@ module ComputeMaxFailures = Generic.MakeStateful (struct ; (* Two hosts pool with no VMs *) ( { basic_pool with - master= {memory_total= gib 256L; name_label= "master"; vms= []} - ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}] + master= {memory_total= gib 256L; name_label= master; vms= []} + ; slaves= [{memory_total= gib 256L; name_label= slave1; vms= []}] ; ha_host_failures_to_tolerate= 2L ; cluster= 2 } @@ -610,8 +668,8 @@ module ComputeMaxFailures = Generic.MakeStateful (struct ; (* Two host pool with one down *) ( { basic_pool with - master= {memory_total= gib 256L; name_label= "master"; vms= []} - ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}] + master= {memory_total= gib 256L; name_label= master; vms= []} + ; slaves= [{memory_total= gib 256L; name_label= slave1; vms= []}] ; ha_host_failures_to_tolerate= 2L ; cluster= 1 } @@ -621,4 +679,731 @@ module ComputeMaxFailures = Generic.MakeStateful (struct ] end) -let tests = [("plan_for_n_failures", PlanForNFailures.tests)] +let extract_output_for_anti_affinity_plan __context anti_affinity_plan = + let slv1 = + Db.Host.get_all ~__context + |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1) + in + let slave1_anti_affinity_vms = + Db.Host.get_resident_VMs ~__context ~self:slv1 + |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self)) + |> List.filter (fun (_, record) -> not record.API.vM_is_control_domain) + |> List.map (fun (self, record) -> + (self, Xapi_ha_vm_failover.vm_memory ~__context record) + ) + |> Xapi_ha_vm_failover.anti_affinity_vms_increasing ~__context + in + try + anti_affinity_plan ~__context slave1_anti_affinity_vms [] + |> List.map (fun (vm, host) -> + ( Db.VM.get_name_label ~__context ~self:vm + , Db.Host.get_name_label ~__context ~self:host + ) + ) + with Api_errors.Server_error ("NO_HOSTS_AVAILABLE", []) as e -> + [("Anti-affinity VMs plan failed", Printexc.to_string e)] + +let anti_affinity_grp1 = {name_label= grp1; placement= AntiAffinity} + +let anti_affinity_plan_test_cases = + [ + (* Test 0: No VMs in slave1 to be evacuated. *) + ( { + basic_pool with + master= {memory_total= gib 256L; name_label= master; vms= []} + ; slaves= + [ + {memory_total= gib 256L; name_label= slave1; vms= []} + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [] + , (* Assert that no_breach_plan returns as expected *) + [] + ) + ; (* Test 1: No anti-affinity VMs in slave1 to be evacuated *) + ( { + basic_pool with + master= {memory_total= gib 256L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + {basic_vm with memory= gib 120L; vm_name_label= vm1} + ; {basic_vm with memory= gib 120L; vm_name_label= vm2} + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [] + , (* Assert that no_breach_plan returns as expected *) + [] + ) + ; (* Test 2: One anti-affinity VM in slave1 to be evacuated *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; {basic_vm with memory= gib 120L; vm_name_label= vm1} + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [(vm1_grp1, slave2)] + , (* Assert that no_breach_plan returns as expected *) + [(vm1_grp1, slave2)] + ) + ; (* Test 3: One anti-affinity VM in slave1 to be evacuated, the smallest host already has anti-affinity VM in the same group *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; {basic_vm with memory= gib 120L; vm_name_label= "vm2"} + ] + } + ; { + memory_total= gib 256L + ; name_label= slave2 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [(vm1_grp1, master)] + , (* Assert that no_breach_plan returns as expected *) + [(vm1_grp1, master)] + ) + ; (* Test 4: Two anti-affinity VMs belong to one group in slave1 to be evacuated *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 130L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [(vm2_grp1, master); (vm1_grp1, slave2)] + , (* Assert that no_breach_plan returns as expected *) + [(vm2_grp1, master); (vm1_grp1, slave2)] + ) + ; (* Test 5: Two anti-affinity VMs belong to one group in slave1 to be evacuated, only 1 can be planed *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 513L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [ + ( "Anti-affinity VMs plan failed" + , "Server_error(NO_HOSTS_AVAILABLE, [ ])" + ) + ] + , (* Assert that no_breach_plan returns as expected *) + [(vm1_grp1, slave2)] + ) + ; (* Test 6: 6 anti-affinity VMs belong to one group in slave1 to be evacuated, only 5 can be planned *) + ( { + basic_pool with + master= {memory_total= gib 640L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 60L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 400L + ; vm_name_label= vm6_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 250L + ; vm_name_label= vm4_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 260L + ; vm_name_label= vm5_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 130L + ; vm_name_label= vm3_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [ + ( "Anti-affinity VMs plan failed" + , "Server_error(NO_HOSTS_AVAILABLE, [ ])" + ) + ] + , (* Assert that no_breach_plan returns as expected *) + [(vm2_grp1, master); (vm1_grp1, slave2)] + ) + ; (* Test 7: Two groups anti-affinity VMs in slave1 to be evacuated *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm6_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 60L + ; vm_name_label= vm5_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 130L + ; vm_name_label= vm7_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 1L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 2L + ; vm_name_label= vm2_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 3L + ; vm_name_label= vm3_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 4L + ; vm_name_label= vm4_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ] + } + ; {memory_total= gib 256L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [ + (vm7_grp2, master) + ; (vm6_grp1, slave2) + ; (vm5_grp2, slave2) + ; (vm4_grp2, master) + ; (vm3_grp1, master) + ; (vm2_grp2, slave2) + ; (vm1_grp1, slave2) + ] + , (* Assert that no_breach_plan returns as expected *) + [ + (vm4_grp2, master) + ; (vm3_grp1, master) + ; (vm2_grp2, slave2) + ; (vm1_grp1, slave2) + ] + ) + ; (* Test 8: Two groups anti-affinity VMs in slave1 to be evacuated, master is bigger than slave2 in size when started, but becomes smaller during planning *) + ( { + basic_pool with + master= {memory_total= gib 512L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 120L + ; vm_name_label= vm6_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 60L + ; vm_name_label= vm5_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 130L + ; vm_name_label= vm7_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 1L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 6L + ; vm_name_label= vm3_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ; { + basic_vm with + memory= gib 5L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 7L + ; vm_name_label= vm4_grp2 + ; groups= [{name_label= grp2; placement= AntiAffinity}] + } + ] + } + ; {memory_total= gib 510L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan returns as expected *) + [ + (vm7_grp2, slave2) + ; (vm6_grp1, master) + ; (vm5_grp2, master) + ; (vm4_grp2, slave2) + ; (vm3_grp2, master) + ; (vm2_grp1, master) + ; (vm1_grp1, slave2) + ] + , (* Assert that no_breach_plan returns as expected *) + [ + (vm4_grp2, slave2) + ; (vm3_grp2, master) + ; (vm2_grp1, master) + ; (vm1_grp1, slave2) + ] + ) + ] + +module Slave1EvacuationVMAntiAffinitySpreadEvenlyPlan = +Generic.MakeStateful (struct + module Io = struct + type input_t = pool + + type output_t = (string * string) list + + let string_of_input_t = string_of_pool + + let string_of_output_t = Test_printers.(list (pair string string)) + end + + module State = Test_state.XapiDb + + let load_input __context = setup ~__context + + let extract_output __context _ = + let slv1 = + Db.Host.get_all ~__context + |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1) + in + let hosts = + Db.Host.get_all ~__context + |> List.filter (fun h -> + h <> slv1 + && Db.Host.get_name_label ~__context ~self:h <> "localhost" + ) + |> List.map (fun host -> + (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host) + ) + in + let pool_state = + Xapi_ha_vm_failover.init_spread_evenly_plan_pool_state ~__context hosts + in + extract_output_for_anti_affinity_plan __context + (Xapi_ha_vm_failover.compute_spread_evenly_plan pool_state) + + let tests = + `QuickAndAutoDocumented + (anti_affinity_plan_test_cases + |> List.map (fun (pool, spread_evenly_plan, _no_breach_plan) -> + (pool, spread_evenly_plan) + ) + ) +end) + +module Slave1EvacuationVMAntiAffinityNoBreachPlan = Generic.MakeStateful (struct + module Io = struct + type input_t = pool + + type output_t = (string * string) list + + let string_of_input_t = string_of_pool + + let string_of_output_t = Test_printers.(list (pair string string)) + end + + module State = Test_state.XapiDb + + let load_input __context = setup ~__context + + let extract_output __context _ = + let total_hosts = + Db.Host.get_all ~__context + |> List.filter (fun h -> + Db.Host.get_name_label ~__context ~self:h <> "localhost" + ) + |> List.length + in + let slv1 = + Db.Host.get_all ~__context + |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1) + in + let hosts = + Db.Host.get_all ~__context + |> List.filter (fun h -> + h <> slv1 + && Db.Host.get_name_label ~__context ~self:h <> "localhost" + ) + |> List.map (fun host -> + (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host) + ) + in + let pool_state = + Xapi_ha_vm_failover.init_spread_evenly_plan_pool_state ~__context hosts + |> Xapi_ha_vm_failover.init_no_breach_plan_pool_state + in + extract_output_for_anti_affinity_plan __context + (Xapi_ha_vm_failover.compute_no_breach_plan total_hosts pool_state) + + let tests = + `QuickAndAutoDocumented + (anti_affinity_plan_test_cases + |> List.map (fun (pool, _spread_evenly_plan, no_breach_plan) -> + (pool, no_breach_plan) + ) + ) +end) + +module Slave1EvacuationPlan = Generic.MakeStateful (struct + module Io = struct + type input_t = pool + + type output_t = (string * string) list + + let string_of_input_t = string_of_pool + + let string_of_output_t = Test_printers.(list (pair string string)) + end + + module State = Test_state.XapiDb + + let load_input __context = setup ~__context + + let extract_output __context _ = + let all_hosts = + Db.Host.get_all ~__context + |> List.filter (fun h -> + Db.Host.get_name_label ~__context ~self:h <> "localhost" + ) + in + let slv1 = + Db.Host.get_all ~__context + |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1) + in + let slave1_vms = + Db.Host.get_resident_VMs ~__context ~self:slv1 + |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self)) + |> List.filter (fun (_, record) -> not record.API.vM_is_control_domain) + |> List.map (fun (self, record) -> + (self, Xapi_ha_vm_failover.vm_memory ~__context record) + ) + in + let hosts = + all_hosts + |> List.filter (( <> ) slv1) + |> List.map (fun host -> + (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host) + ) + in + Xapi_ha_vm_failover.compute_anti_affinity_evacuation_plan ~__context + (List.length all_hosts) hosts slave1_vms + |> List.map (fun (vm, host) -> + ( Db.VM.get_name_label ~__context ~self:vm + , Db.Host.get_name_label ~__context ~self:host + ) + ) + + let tests = + `QuickAndAutoDocumented + [ + (* Test 0: Spread evenly plan is taken. *) + ( { + basic_pool with + master= {memory_total= gib 200L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + { + basic_vm with + memory= gib 24L + ; vm_name_label= vm4_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 23L + ; vm_name_label= vm3_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 22L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 21L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 60L; name_label= slave2; vms= []} + ] + } + , (* Assert that spread_evenly_plan is taken. *) + [ + (vm4_grp1, master) + ; (vm3_grp1, slave2) + ; (vm2_grp1, master) + ; (vm1_grp1, slave2) + ] + ) + (* Test 1: No breach plan is taken. *) + ; ( { + basic_pool with + master= {memory_total= gib 100L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + {basic_vm with memory= gib 85L; vm_name_label= vm1} + ; {basic_vm with memory= gib 65L; vm_name_label= vm2} + ; { + basic_vm with + memory= gib 30L + ; vm_name_label= vm3_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 20L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 10L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 90L; name_label= slave2; vms= []} + ; {memory_total= gib 70L; name_label= slave3; vms= []} + ] + ; keep_localhost= true + } + , (* Assert that no-breach-plan is taken *) + [ + (vm2_grp1, slave2) + ; (vm1_grp1, slave3) + ; (vm3_grp1, slave3) + ; (vm2, slave2) + ; (vm1, master) + ] + ) + (* Test 2: Fallback to binpack plan. *) + ; ( { + basic_pool with + master= {memory_total= gib 100L; name_label= master; vms= []} + ; slaves= + [ + { + memory_total= gib 256L + ; name_label= slave1 + ; vms= + [ + {basic_vm with memory= gib 95L; vm_name_label= vm1} + ; {basic_vm with memory= gib 75L; vm_name_label= vm2} + ; { + basic_vm with + memory= gib 30L + ; vm_name_label= vm3_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 20L + ; vm_name_label= vm2_grp1 + ; groups= [anti_affinity_grp1] + } + ; { + basic_vm with + memory= gib 10L + ; vm_name_label= vm1_grp1 + ; groups= [anti_affinity_grp1] + } + ] + } + ; {memory_total= gib 80L; name_label= slave2; vms= []} + ; {memory_total= gib 70L; name_label= slave3; vms= []} + ] + ; keep_localhost= true + } + , (* Assert that binpack-plan is taken *) + [ + (vm1_grp1, slave3) + ; (vm2_grp1, slave3) + ; (vm3_grp1, slave3) + ; (vm2, slave2) + ; (vm1, master) + ] + ) + ] +end) + +let tests = + [ + ("plan_for_n_failures", PlanForNFailures.tests) + ; ( "anti-affinity spread evenly plan" + , Slave1EvacuationVMAntiAffinitySpreadEvenlyPlan.tests + ) + ; ( "anti-affinity no breach plan" + , Slave1EvacuationVMAntiAffinityNoBreachPlan.tests + ) + ; ( "3 phases plan: spread evenly plan, no breach plan, binpacking plan" + , Slave1EvacuationPlan.tests + ) + ] diff --git a/ocaml/xapi/xapi_ha_vm_failover.mli b/ocaml/xapi/xapi_ha_vm_failover.mli index 89a4c3d20e5..52e2382164f 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.mli +++ b/ocaml/xapi/xapi_ha_vm_failover.mli @@ -86,3 +86,61 @@ val assert_nfailures_change_preserves_ha_plan : __context:Context.t -> int -> unit val assert_new_vm_preserves_ha_plan : __context:Context.t -> API.ref_VM -> unit + +(* Below exposed only for ease of testing *) + +module VMGroupMap : Map.S with type key = [`VM_group] Ref.t + +module HostKey : sig + type t = [`host] Ref.t +end + +module AntiAffinityEvacuationPlanHostQueue : Psq.S with type k = HostKey.t + +type spread_evenly_plan_pool_state = + AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + +type no_breach_plan_pool_state = { + grp_no_resident_hosts_queue: + AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + ; grp_resident_hosts_count_map: int VMGroupMap.t +} + +val compute_spread_evenly_plan : + __context:Context.t + -> AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + -> (API.ref_VM * int64 * API.ref_VM_group) list + -> (API.ref_VM * API.ref_host) list + -> (API.ref_VM * API.ref_host) list + +val compute_no_breach_plan : + __context:Context.t + -> int + -> no_breach_plan_pool_state + -> (API.ref_VM * int64 * API.ref_VM_group) list + -> (API.ref_VM * API.ref_host) list + -> (API.ref_VM * API.ref_host) list + +val compute_anti_affinity_evacuation_plan : + __context:Context.t + -> int + -> (API.ref_host * int64) list + -> (API.ref_VM * int64) list + -> (API.ref_VM * API.ref_host) list + +val host_free_memory : __context:Context.t -> host:[`host] Ref.t -> int64 + +val vm_memory : __context:Context.t -> API.vM_t -> int64 + +val anti_affinity_vms_increasing : + __context:Context.t + -> (API.ref_VM * 'a) list + -> (API.ref_VM * 'a * API.ref_VM_group) list + +val init_spread_evenly_plan_pool_state : + __context:Context.t + -> (API.ref_host * int64) list + -> AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t + +val init_no_breach_plan_pool_state : + spread_evenly_plan_pool_state -> no_breach_plan_pool_state